Automatically generated by Mendeley Desktop 1.17.8
Any changes to this file will be lost if it is regenerated by Mendeley.
BibTeX export options can be customized via Preferences -> BibTeX in Mendeley Desktop
@inproceedings{calandriello2016pack,
abstract = {Most kernel-based methods, such as kernel regression, kernel PCA, ICA, or k-means clustering, do not scale to large datasets, because constructing and storing the kernel matrix Kn requires at least O(n2) time and space for n samples. Recent works (Alaoui 2014, Musco 2016) show that sampling points with replacement according to their ridge leverage scores (RLS) generates small dictionaries of relevant points with strong spectral approximation guarantees for Kn. The drawback of RLS-based methods is that computing exact RLS requires constructing and storing the whole kernel matrix. In this paper, we introduce SQUEAK, a new algorithm for kernel approximation based on RLS sampling that sequentially processes the dataset, storing a dictionary which creates accurate kernel matrix approximations with a number of points that only depends on the effective dimension deffgamma of the dataset. Moreover since all the RLS estimations are efficiently performed using only the small dictionary, SQUEAK never constructs the whole matrix kermatrixn, runs in linear time widetildeO(ndeffgamma3) w.r.t.n, and requires only a single pass over the dataset.},
author = {Calandriello, Daniele and Lazaric, Alessandro and Valko, Michal},
booktitle = {Adaptive and Scalable Nonparametric Methods in Machine Learning at Neural Information Processing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Calandriello, Lazaric, Valko - 2016 - Pack only the essentials Adaptive dictionary learning for kernel ridge regression.pdf:pdf},
title = {{Pack only the essentials: Adaptive dictionary learning for kernel ridge regression}},
year = {2016}
}
@inproceedings{LiJeSr16,
abstract = {In this note we consider sampling from (non-homogeneous) strongly Rayleigh probability measures. As an important corollary, we obtain a fast mixing Markov Chain sampler for Determinantal Point Processes.},
archivePrefix = {arXiv},
arxivId = {1607.03559},
author = {Li, Chengtao and Jegelka, Stefanie and Sra, Suvrit},
booktitle = {Neural Information Processing Systems},
eprint = {1607.03559},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Li, Jegelka, Sra - 2016 - Fast Sampling for Strongly Rayleigh Measures with Application to Determinantal Point Processes.pdf:pdf},
title = {{Fast Sampling for Strongly Rayleigh Measures with Application to Determinantal Point Processes}},
year = {2016}
}
@article{Straszak2016,
abstract = {A determinantal point process (DPP) over a universe {\$}\backslash{\{}1,\backslashldots,m\backslash{\}}{\$} with respect to an {\$}m \backslashtimes m{\$} positive semidefinite matrix {\$}L{\$} is a probability distribution where the probability of a subset {\$}S \backslashsubseteq \backslash{\{}1,\backslashldots,m\backslash{\}}{\$} is proportional to the determinant of the principal minor of {\$}L{\$} corresponding to {\$}S.{\$} DPPs encapsulate a wide variety of known distributions and appear naturally (and surprisingly) in a wide variety of areas such as physics, mathematics and computer science. Several applications that use DPPs rely on the fact that they are computationally tractable -- i.e., there are algorithms for sampling from DPPs efficiently. Recently, there is growing interest in studying a generalization of DPPs in which the support of the distribution is a restricted family B of subsets of {\$}\backslash{\{}1,2,\backslashldots, m\backslash{\}}{\$}. Mathematically, these distributions, which we call generalized DPPs, include the well-studied hardcore distributions as special cases (when {\$}L{\$} is diagonal). In applications, they can be used to refine models based on DPPs by imposing combinatorial constraints on the support of the distribution. In this paper we take first steps in a systematic study of computational questions concerning generalized DPPs. We introduce a natural class of linear families: roughly, a family B is said to be linear if there is a collection of {\$}p{\$} linear forms that all elements of B satisfy. Important special cases of linear families are all sets of cardinality {\$}k{\$} -- giving rise to {\$}k{\$}-DPPs -- and, more generally, partition matroids. On the positive side, we prove that, when {\$}p{\$} is a constant, there is an efficient, exact sampling algorithm for linear DPPs. We complement these results by proving that, when {\$}p{\$} is large, the computational problem related to such DPPs becomes {\$}\backslash{\#}{\$}P-hard. Our proof techniques rely and build on the interplay between polynomials and probability distributions.},
archivePrefix = {arXiv},
arxivId = {1608.00554},
author = {Straszak, Damian and Vishnoi, Nisheeth K.},
eprint = {1608.00554},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Straszak, Vishnoi - 2016 - Generalized Determinantal Point Processes The Linear Case.pdf:pdf},
month = {aug},
title = {{Generalized Determinantal Point Processes: The Linear Case}},
url = {http://arxiv.org/abs/1608.00554},
year = {2016}
}
@article{Li2016,
abstract = {The Nystr$\backslash$"om method has long been popular for scaling up kernel methods. Its theoretical guarantees and empirical performance rely critically on the quality of the landmarks selected. We study landmark selection for Nystr$\backslash$"om using Determinantal Point Processes (DPPs), discrete probability models that allow tractable generation of diverse samples. We prove that landmarks selected via DPPs guarantee bounds on approximation errors; subsequently, we analyze implications for kernel ridge regression. Contrary to prior reservations due to cubic complexity of DPPsampling, we show that (under certain conditions) Markov chain DPP sampling requires only linear time in the size of the data. We present several empirical results that support our theoretical analysis, and demonstrate the superior performance of DPP-based landmark selection compared with existing approaches.},
archivePrefix = {arXiv},
arxivId = {1603.06052},
author = {Li, Chengtao and Jegelka, Stefanie and Sra, Suvrit},
eprint = {1603.06052},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Li, Jegelka, Sra - 2016 - Fast DPP Sampling for Nystrom with Application to Kernel Methods.pdf:pdf},
month = {mar},
title = {{Fast DPP Sampling for Nystr$\backslash$"om with Application to Kernel Methods}},
url = {http://arxiv.org/abs/1603.06052},
year = {2016}
}
@article{Gong2014,
abstract = {Video summarization is a challenging problem with great application potential. Whereas prior approaches, largely unsupervised in nature, focus on sampling use-ful frames and assembling them as summaries, we consider video summarization as a supervised subset selection problem. Our idea is to teach the system to learn from human-created summaries how to select informative and diverse subsets, so as to best meet evaluation metrics derived from human-perceived quality. To this end, we propose the sequential determinantal point process (seqDPP), a proba-bilistic model for diverse sequential subset selection. Our novel seqDPP heeds the inherent sequential structures in video data, thus overcoming the deficiency of the standard DPP, which treats video frames as randomly permutable items. Mean-while, seqDPP retains the power of modeling diverse subsets, essential for summa-rization. Our extensive results of summarizing videos from 3 datasets demonstrate the superior performance of our method, compared to not only existing unsuper-vised methods but also naive applications of the standard DPP model.},
author = {Gong, Boqing and Chao, Wl and Grauman, Kristen and Sha, F},
issn = {10495258},
journal = {Nips},
keywords = {erse sequential subset selection,for,supervised video summarization},
pages = {1--9},
title = {{Diverse Sequential Subset Selection for Supervised Video Summarization}},
url = {http://papers.nips.cc/paper/5413-large-scale-l-bfgs-using-mapreduce},
year = {2014}
}
@article{Kathuria2016a,
abstract = {Gaussian Process bandit optimization has emerged as a powerful tool for optimizing noisy black box functions. One example in machine learning is hyper-parameter optimization where each evaluation of the target function requires training a model which may involve days or even weeks of computation. Most methods for this so-called "Bayesian optimization" only allow sequential exploration of the parameter space. However, it is often desirable to propose batches or sets of parameter values to explore simultaneously, especially when there are large parallel processing facilities at our disposal. Batch methods require modeling the interaction between the different evaluations in the batch, which can be expensive in complex scenarios. In this paper, we propose a new approach for parallelizing Bayesian optimization by modeling the diversity of a batch via Determinantal point processes (DPPs) whose kernels are learned automatically. This allows us to generalize a previous result as well as prove better regret bounds based on DPP sampling. Our experiments on a variety of synthetic and real-world robotics and hyper-parameter optimization tasks indicate that our DPP-based methods, especially those based on DPP sampling, outperform state-of-the-art methods.},
archivePrefix = {arXiv},
arxivId = {1611.04088},
author = {Kathuria, Tarun and Deshpande, Amit and Kohli, Pushmeet},
eprint = {1611.04088},
journal = {Neural Information Processing Systems},
month = {nov},
title = {{Batched Gaussian Process Bandit Optimization via Determinantal Point Processes}},
url = {http://arxiv.org/abs/1611.04088},
year = {2016}
}
@article{Agarwal2014,
abstract = {In this paper, we compare three initialization schemes for the KMEANS clustering algorithm: 1) random initialization (KMEANSRAND), 2) KMEANS++, and 3) KMEANSD++. Both KMEANSRAND and KMEANS++ have a major that the value of k needs to be set by the user of the algorithms. (Kang 2013) recently proposed a novel use of determinantal point processes for sampling the initial centroids for the KMEANS algorithm (we call it KMEANSD++). They, however, do not provide any evaluation establishing that KMEANSD++ is better than other algorithms. In this paper, we show that the performance of KMEANSD++ is comparable to KMEANS++ (both of which are better than KMEANSRAND) with KMEANSD++ having an additional that it can automatically approximate the value of k.},
archivePrefix = {arXiv},
arxivId = {1410.6975},
author = {Agarwal, Apoorv and Choromanska, Anna and Choromanski, Krzysztof},
eprint = {1410.6975},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Agarwal, Choromanska, Choromanski - 2014 - Notes on using Determinantal Point Processes for Clustering with Applications to Text Cluster.pdf:pdf},
month = {oct},
title = {{Notes on using Determinantal Point Processes for Clustering with Applications to Text Clustering}},
url = {http://arxiv.org/abs/1410.6975},
year = {2014}
}
@article{Bardenet2016,
abstract = {We show that repulsive random variables can yield Monte Carlo methods with faster convergence rates than the typical {\$}N{\^{}}{\{}-1/2{\}}{\$}, where {\$}N{\$} is the number of integrand evaluations. More precisely, we propose stochastic numerical quadratures involving determinantal point processes associated with multivariate orthogonal polynomials, and we obtain root mean square errors that decrease as {\$}N{\^{}}{\{}-(1+1/d)/2{\}}{\$}, where {\$}d{\$} is the dimension of the ambient space. First, we prove a central limit theorem (CLT) for the linear statistics of a class of determinantal point processes, when the reference measure is a product measure supported on a hypercube, which satisfies the Nevai-class regularity condition, a result which may be of independent interest. Next, we introduce a Monte Carlo method based on these determinantal point processes, and prove a CLT with explicit limiting variance for the quadrature error, when the reference measure satisfies a stronger regularity condition. As a corollary, by taking a specific reference measure and using a construction similar to importance sampling, we obtain a general Monte Carlo method, which applies to any measure with continuously derivable density. Loosely speaking, our method can be interpreted as a stochastic counterpart to Gaussian quadrature, which, at the price of some convergence rate, is easily generalizable to any dimension and has a more explicit error term.},
archivePrefix = {arXiv},
arxivId = {1605.00361},
author = {Bardenet, R{\'{e}}mi and Hardy, Adrien},
eprint = {1605.00361},
file = {::},
month = {may},
pages = {48},
title = {{Monte Carlo with Determinantal Point Processes}},
url = {http://arxiv.org/abs/1605.00361},
year = {2016}
}
@article{DuBa16,
abstract = {We propose a new class of determinantal point processes (DPPs) which can be manipulated for inference and parameter learning in potentially sublinear time in the number of items. This class, based on a specific low-rank factorization of the marginal kernel, is particularly suited to a subclass of continuous DPPs and DPPs defined on exponentially many items. We apply this new class to modelling text documents as sampling a DPP of sentences, and propose a conditional maximum likelihood formulation to model topic proportions, which is made possible with no approximation for our class of DPPs. We present an application to document summarization with a DPP on {\$}2{\^{}}{\{}500{\}}{\$} items.},
archivePrefix = {arXiv},
arxivId = {1610.05925},
author = {Dupuy, Christophe and Bach, Francis},
eprint = {1610.05925},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Dupuy, Bach - 2016 - Learning Determinantal Point Processes in Sublinear Time.pdf:pdf},
journal = {arXiv preprint arXiv:1610.05925},
month = {oct},
title = {{Learning Determinantal Point Processes in Sublinear Time}},
url = {http://arxiv.org/abs/1610.05925},
year = {2016}
}
@article{Gillenwater2014,
abstract = {A determinantal point process (DPP) is a probabilistic model of set diversity compactly parameterized by a positive semi-definite kernel matrix. To fit a DPP to a given task, we would like to learn the entries of its kernel matrix by maximizing the log-likelihood of the available data. However, log-likelihood is non-convex in the entries of the kernel matrix, and this learning problem is conjectured to be NP-hard. Thus, previous work has instead focused on more restricted convex learning settings: learning only a single weight for each row of the kernel matrix, or learning weights for a linear combination of DPPs with fixed kernel matrices. In this work we propose a novel algorithm for learning the full kernel matrix. By changing the kernel parameterization from matrix entries to eigenvalues and eigenvectors, and then lower-bounding the likelihood in the manner of expectation-maximization algorithms, we obtain an effective optimization procedure. We test our method on a real-world product recommendation task, and achieve relative gains of up to 16.5{\%} in test log-likelihood compared to the naive approach of maximizing likelihood by projected gradient ascent on the entries of the kernel matrix.},
archivePrefix = {arXiv},
arxivId = {1411.1088},
author = {Gillenwater, Jennifer A and Kulesza, Alex and Fox, Emily and Taskar, Ben},
eprint = {1411.1088},
journal = {Nips},
pages = {3149--3157},
title = {{Expectation-Maximization for Learning Determinantal Point Processes}},
url = {http://papers.nips.cc/paper/5564-expectation-maximization-for-learning-determinantal-point-processes.pdf{\%}5Cnhttp://papers.nips.cc/paper/5564-expectation-maximization-for-learning-determinantal-point-processes},
year = {2014}
}
@article{affandi2013nystrom,
abstract = {Determinantal point processes (DPPs) are appealing models for subset selection prob-lems where diversity is desired. They offer surprisingly efficient inference, including sam-pling in O(N 3) time and O(N 2) space, where N is the number of base items. However, in some applications, N may grow so large that sampling from a DPP becomes compu-tationally infeasible. This is especially true in settings where the DPP kernel matrix can-not be represented by a linear decomposition of low-dimensional feature vectors. In these cases, we propose applying the Nystr{\"{o}}m ap-proximation to project the kernel matrix into a low-dimensional space. While theoretical guarantees for the Nystr{\"{o}}m approximation in terms of standard matrix norms have been previously established, we are concerned with probabilistic measures, like total variation dis-tance between the DPP and its Nystr{\"{o}}m ap-proximation, that behave quite differently. In this paper we derive new error bounds for the Nystr{\"{o}}m-approximated DPP and present em-pirical results to corroborate them. We then demonstrate the Nystr{\"{o}}m-approximated DPP by applying it to a motion capture summa-rization task.},
author = {Affandi, Raja Hafiz and Kulesza, Alex and Fox, Emily B and Taskar, Ben},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Affandi et al. - Unknown - Nystr{\"{o}}m Approximation for Large-Scale Determinantal Processes.pdf:pdf},
issn = {15337928},
journal = {International Conference on Artificial Intelligence and Statistics},
pages = {85--98},
title = {{Nystrom approximation for large-scale determinantal processes.}},
volume = {31},
year = {2013}
}
@article{HKPV06,
abstract = {We give a probabilistic introduction to determinantal and permanental point processes. Determinantal processes arise in physics (fermions, eigenvalues of random matrices) and in combinatorics (nonintersecting paths, random spanning trees). They have the striking property that the number of points in a region {\$}D{\$} is a sum of independent Bernoulli random variables, with parameters which are eigenvalues of the relevant operator on {\$}L{\^{}}2(D){\$}. Moreover, any determinantal process can be represented as a mixture of determinantal projection processes. We give a simple explanation for these known facts, and establish analogous representations for permanental processes, with geometric variables replacing the Bernoulli variables. These representations lead to simple proofs of existence criteria and central limit theorems, and unify known results on the distribution of absolute values in certain processes with radially symmetric distributions.},
archivePrefix = {arXiv},
arxivId = {math/0503110},
author = {Hough, J. Ben and Krishnapur, Manjunath and Peres, Yuval and Vir{\'{a}}g, B{\'{a}}lint},
doi = {10.1214/154957806000000078},
eprint = {0503110},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Hough et al. - 2006 - Determinantal Processes and Independence(2).pdf:pdf},
issn = {1549-5787},
journal = {Probability Surveys},
pages = {206--229},
primaryClass = {math},
title = {{Determinantal Processes and Independence}},
url = {http://arxiv.org/abs/math/0503110{\%}5Cnhttp://www.arxiv.org/pdf/math/0503110.pdf},
volume = {3},
year = {2006}
}
@article{kulesza2011kdpps,
abstract = {Determinantal point processes ( DPPs ) have recently been proposed$\backslash$nas models for set selection problems where diversity is pre- ferred.$\backslash$nFor example, they can be used to select diverse sets of sentences$\backslash$nto form doc- ument summaries, or to find multiple non- overlapping$\backslash$nhuman ...},
author = {Kulesza, Alex and Taskar, Ben},
journal = {International Conference on Machine Learning},
pages = {1193--1200},
title = {{k-DPPs: Fixed-Size Determinantal Point Processes}},
year = {2011}
}
@article{LaMoRu15,
abstract = {Statistical models and methods for determinantal point processes (DPPs) seem largely unexplored. We demonstrate that DPPs provide useful models for the description of spatial point pattern datasets where nearby points repel each other. Such data are usually modelled by Gibbs point processes, where the likelihood and moment expressions are intractable and simulations are time consuming. We exploit the appealing probabilistic properties of DPPs to develop parametric models, where the likelihood and moment expressions can be easily evaluated and realizations can be quickly simulated. We discuss how statistical inference is conducted using the likelihood or moment properties of DPP models, and we provide freely available software for simulation and statistical inference.},
archivePrefix = {arXiv},
arxivId = {1205.4818},
author = {Lavancier, Fr{\'{e}}d{\'{e}}ric and M{\o}ller, Jesper and Rubak, Ege},
doi = {10.1111/rssb.12096},
eprint = {1205.4818},
file = {::},
issn = {14679868},
journal = {Journal of the Royal Statistical Society. Series B: Statistical Methodology},
keywords = {Maximum-likelihood-based inference,Point process density,Product densities,Repulsiveness,Simulation,Spectral approach},
number = {4},
pages = {853--877},
title = {{Determinantal point process models and statistical inference}},
volume = {77},
year = {2015}
}
@article{Affandi2014a,
abstract = {Determinantal point processes (DPPs) are well- suited for modeling repulsion and have proven useful in applications where diversity is desired. While DPPs have many appealing properties, learning the parameters of a DPP is difficult, as the likelihood is non-convex and is infeasible to compute in many scenarios. Here we propose Bayesian methods for learning the DPP kernel parameters. These methods are applicable in large- scale discrete and continuous DPP settings, even when the likelihood can only be bounded. We demonstrate the utility of our DPP learning methods in studying the progression of diabetic neuropathy based on the spatial distribution of nerve fibers, and in studying human perception of diversity in images.},
archivePrefix = {arXiv},
arxivId = {1402.4862},
author = {Affandi, Raja Hafiz and Fox, Emily B. and Adams, Ryan P. and Taskar, Ben},
eprint = {1402.4862},
isbn = {9781634393973},
journal = {International Conference on Machine Learning},
number = {1},
pages = {2967--2981},
title = {{Learning the parameters of determinantal point process kernels}},
url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-84919798123{\&}partnerID=tZOtx3y1},
volume = {4},
year = {2014}
}
@article{Evans2009,
abstract = {In this paper, we use a partition of the links of a network in order to uncover its community structure. This approach allows for communities to overlap at nodes, so that nodes may be in more than one community. We do this by making a node partition of the line graph of the original network. In this way we show that any algorithm which produces a partition of nodes can be used to produce a partition of links. We discuss the role of the degree heterogeneity and propose a weighted version of the line graph in order to account for this.},
archivePrefix = {arXiv},
arxivId = {0903.2181},
author = {Evans, T. S. and Lambiotte, R.},
doi = {10.1103/PhysRevE.80.016105},
eprint = {0903.2181},
file = {::},
issn = {1539-3755},
journal = {Physical Review E},
month = {jul},
number = {1},
pages = {016105},
title = {{Line graphs, link partitions, and overlapping communities}},
url = {http://arxiv.org/abs/0903.2181},
volume = {80},
year = {2009}
}
@inproceedings{Gartrell2016,
abstract = {Determinantal point processes (DPPs) have garnered attention as an elegant probabilistic model of set diversity. They are useful for a number of subset selection tasks, including product recommendation. DPPs are parametrized by a positive semi-definite kernel matrix. In this work we present a new method for learning the DPP kernel from observed data using a low-rank factorization of this kernel. We show that this low-rank factorization enables a learning algorithm that is nearly an order of magnitude faster than previous approaches, while also providing for a method for computing product recommendation predictions that is far faster (up to 20x faster or more for large item catalogs) than previous techniques that involve a full-rank DPP kernel. Furthermore, we show that our method provides equivalent or sometimes better predictive performance than prior full-rank DPP approaches, and better performance than several other competing recommendation methods in many cases. We conduct an extensive experimental evaluation using several real-world datasets in the domain of product recommendation to demonstrate the utility of our method, along with its limitations.},
archivePrefix = {arXiv},
arxivId = {1602.05436},
author = {Gartrell, Mike and Paquet, Ulrich and Koenigstein, Noam},
booktitle = {AAAI Conference on Artificial Intelligence},
eprint = {1602.05436},
file = {::},
month = {feb},
title = {{Low-Rank Factorization of Determinantal Point Processes for Recommendation}},
url = {http://arxiv.org/abs/1602.05436},
year = {2017}
}
@article{KuTa12,
abstract = {Determinantal point processes (DPPs) are elegant probabilistic models of repulsion that arise in quantum physics and random matrix theory. In contrast to traditional structured models like Markov random fields, which become intractable and hard to approximate in the presence of negative correlations, DPPs offer efficient and exact algorithms for sampling, marginalization, conditioning, and other inference tasks. We provide a gentle introduction to DPPs, focusing on the intuitions, algorithms, and extensions that are most relevant to the machine learning community, and show how DPPs can be applied to real-world applications like finding diverse sets of high-quality search results, building informative summaries by selecting diverse sentences from documents, modeling non-overlapping human poses in images or video, and automatically building timelines of important news stories.},
archivePrefix = {arXiv},
arxivId = {1207.6083},
author = {Kulesza, Alex and Taskar, Ben},
doi = {10.1561/2200000044},
eprint = {1207.6083},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Kulesza, Taskar - 2012 - Determinantal Point Processes for Machine Learning(2).pdf:pdf},
issn = {1935-8237},
journal = {Foundations and Trends{\textregistered} in Machine Learning},
month = {jul},
number = {2-3},
pages = {123--286},
title = {{Determinantal Point Processes for Machine Learning}},
url = {http://arxiv.org/abs/1207.6083},
volume = {5},
year = {2012}
}
@article{Lin2012,
abstract = {We introduce a method to learn a mixture of submodular “shells” in a large-margin setting. A submodular shell is an abstract submodular function that can be instantiated with a ground set and a set of parameters to produce a submodular function. A mixture of such shells can then also be so instantiated to produce a more complex submodular function. What our algorithm learns are the mixture weights over such shells. We provide a risk bound guarantee when learning in a large-margin structured-prediction setting using a projected subgradient method when only approximate submodular optimization is possible (such as with submodular function maximization). We apply this method to the problem of multi-document summarization and produce the best results reported so far on the widely used NIST DUC-05 through DUC-07 document summarization corpora.},
author = {Lin, Hui and Bilmes, Ja},
isbn = {9780974903989},
journal = {Uncertainty in Artificial Intelligence},
title = {{Learning mixtures of submodular shells with application to document summarization}},
url = {http://arxiv.org/abs/1210.4871},
year = {2012}
}
@article{EvLa10,
archivePrefix = {arXiv},
arxivId = {0912.4389},
author = {Evans, T S and Lambiotte, R},
doi = {10.1140/epjb/e2010-00261-8},
eprint = {0912.4389},
file = {::},
keywords = {05,40,75,89,community detection,edge partition,fb random walks and,fb structures and organization,hc networks and genealogical,in complex,levy flights,line graphs,overlapping communities,pacs,systems,trees,vertex cover},
pages = {1--8},
title = {{Line Graphs of Weighted Networks for Overlapping Communities(EPJB正式版).pdf}},
year = {2010}
}
@article{Mariet2016,
abstract = {Determinantal Point Processes (DPPs) are probabilistic models over all subsets a ground set of {\$}N{\$} items. They have recently gained prominence in several applications that rely on "diverse" subsets. However, their applicability to large problems is still limited due to the {\$}\backslashmathcal O(N{\^{}}3){\$} complexity of core tasks such as sampling and learning. We enable efficient sampling and learning for DPPs by introducing KronDPP, a DPP model whose kernel matrix decomposes as a tensor product of multiple smaller kernel matrices. This decomposition immediately enables fast exact sampling. But contrary to what one may expect, leveraging the Kronecker product structure for speeding up DPP learning turns out to be more difficult. We overcome this challenge, and derive batch and stochastic optimization algorithms for efficiently learning the parameters of a KronDPP.},
archivePrefix = {arXiv},
arxivId = {1605.08374},
author = {Mariet, Zelda and Sra, Suvrit},
eprint = {1605.08374},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Mariet, Sra - 2016 - Kronecker Determinantal Point Processes.pdf:pdf},
month = {may},
title = {{Kronecker Determinantal Point Processes}},
url = {http://arxiv.org/abs/1605.08374},
year = {2016}
}
@article{Mariet2015,
abstract = {Determinantal point processes (DPPs) offer an elegant tool for encoding probabilities over subsets of a ground set. Discrete DPPs are parametrized by a positive semidefinite matrix (called the DPP kernel), and estimating this kernel is key to learning DPPs from observed data. We consider the task of learning the DPP kernel, and develop for it a surprisingly simple yet effective new algorithm. Our algorithm offers the following benefits over previous approaches: (a) it is much simpler; (b) it yields equally good and sometimes even better local maxima; and (c) it runs an order of magnitude faster on large problems. We present experimental results on both real and simulated data to illustrate the numerical performance of our technique.},
archivePrefix = {arXiv},
arxivId = {1508.00792},
author = {Mariet, Zelda and Sra, Suvrit},
eprint = {1508.00792},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Mariet, Sra - 2015 - Fixed-point algorithms for learning determinantal point processes.pdf:pdf},
month = {aug},
title = {{Fixed-point algorithms for learning determinantal point processes}},
url = {http://arxiv.org/abs/1508.00792},
year = {2015}
}
@inproceedings{gillenwater2012discovering,
abstract = {We propose a novel probabilistic technique for modeling and extracting salient struc-ture from large document collections. As in clustering and topic modeling, our goal is to provide an organizing perspective into otherwise overwhelming amounts of infor-mation. We are particularly interested in revealing and exploiting relationships be-tween documents. To this end, we focus on extracting diverse sets of threads—singly-linked, coherent chains of important doc-uments. To illustrate, we extract research threads from citation graphs and construct timelines from news articles. Our method is highly scalable, running on a corpus of over 30 million words in about four minutes, more than 75 times faster than a dynamic topic model. Finally, the results from our model more closely resemble human news summaries according to several metrics and are also preferred by human judges.},
author = {Gillenwater, Jennifer and Kulesza, Alex and Taskar, Ben},
booktitle = {Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning},
file = {::},
pages = {710--720},
title = {{Discovering Diverse and Salient Threads in Document Collections}},
year = {2012}
}
@article{BaTi15,
abstract = {Determinantal point processes (DPPs) are point process models that naturally encode diversity between the points of a given realization, through a positive definite kernel {\$}K{\$}. DPPs possess desirable properties, such as exact sampling or analyticity of the moments, but learning the parameters of kernel {\$}K{\$} through likelihood-based inference is not straightforward. First, the kernel that appears in the likelihood is not {\$}K{\$}, but another kernel {\$}L{\$} related to {\$}K{\$} through an often intractable spectral decomposition. This issue is typically bypassed in machine learning by directly parametrizing the kernel {\$}L{\$}, at the price of some interpretability of the model parameters. We follow this approach here. Second, the likelihood has an intractable normalizing constant, which takes the form of a large determinant in the case of a DPP over a finite set of objects, and the form of a Fredholm determinant in the case of a DPP over a continuous domain. Our main contribution is to derive bounds on the likelihood of a DPP, both for finite and continuous domains. Unlike previous work, our bounds are cheap to evaluate since they do not rely on approximating the spectrum of a large matrix or an operator. Through usual arguments, these bounds thus yield cheap variational inference and moderately expensive exact Markov chain Monte Carlo inference methods for DPPs.},
archivePrefix = {arXiv},
arxivId = {1507.01154},
author = {Bardenet, R{\'{e}}mi and Titsias, Michalis K.},
eprint = {1507.01154},
file = {::},
month = {jul},
title = {{Inference for determinantal point processes without spectral knowledge}},
url = {http://arxiv.org/abs/1507.01154},
year = {2015}
}
@inproceedings{abbeel2004apprenticeship,
author = {Abbeel, Pieter and Ng, Andrew},
booktitle = {Proceedings of the 21st international conference on machine learning},
doi = {http://doi.acm.org/10.1145/1015330.1015430},
isbn = {1-58113-838-5},
keywords = {irl},
mendeley-tags = {irl},
title = {{Apprenticeship learning via inverse reinforcement learning}},
url = {http://www.eecs.harvard.edu/{~}parkes/cs286r/spring06/papers/abeelng.pdf},
year = {2004}
}
@article{ross2010reduction,
abstract = {Sequential prediction problems such as imitation learning, where future observations depend on previous predictions (actions), violate the common i.i.d. assumptions made in statistical learning. This leads to poor performance in theory and often in practice. Some recent approaches provide stronger guarantees in this setting, but remain somewhat unsatisfactory as they train either non-stationary or stochastic policies and require a large number of iterations. In this paper, we propose a new iterative algorithm, which trains a stationary deterministic policy, that can be seen as a no regret algorithm in an online learning setting. We show that any such no regret algorithm, combined with additional reduction assumptions, must find a policy with good performance under the distribution of observations it induces in such sequential settings. We demonstrate that this new approach outperforms previous approaches on two challenging imitation learning problems and a benchmark sequence labeling problem.},
author = {Ross, Stephane and Gordon, Geoffrey J and Bagnell, J Andrew},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Ross, Gordon, Bagnell - 2010 - A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning.pdf:pdf},
journal = {AISTATS},
pages = {627--635},
title = {{A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning}},
url = {http://arxiv.org/abs/1011.0686},
volume = {15},
year = {2010}
}
@inproceedings{bagnell2010efficient,
abstract = {Imitation Learning, while applied successfully on many large real-world$\backslash$nproblems, is typically addressed as a standard supervised learning$\backslash$nproblem, where it is assumed the training and testing data are i.i.d..$\backslash$nThis is not true in imitation learning as the learned policy influences$\backslash$nthe future test inputs (states) upon which it will be tested. We$\backslash$nshow that this leads to compounding errors and a regret bound that$\backslash$ngrows quadratically in the time horizon of the task. We propose two$\backslash$nalternative algorithms for imitation learning where training occurs$\backslash$nover several episodes of interaction. These two approaches share$\backslash$nin common that the learner's policy is slowly modified from executing$\backslash$nthe expert's policy to the learned policy. We show that this leads$\backslash$nto stronger performance guarantees and demonstrate the improved performance$\backslash$non two challenging problems: training a learner to play 1) a 3D racing$\backslash$ngame (Super Tux Kart) and 2) Mario Bros.; given input images from$\backslash$nthe games and corresponding actions taken by a human expert and near-optimal$\backslash$nplanner respectively.},
author = {Bagnell, J Andrew and Ross, St{\'{e}}phane},
booktitle = {Proceedings of the 13th International Conference on Artificial Intelligence and Statistics (AISTATS) 2010},
pages = {661--668},
title = {{Efficient Reductions for Imitation Learning}},
volume = {9},
year = {2010}
}
@inproceedings{judah2012active,
author = {Judah, Kshitij and Fern, Alan Paul and Dietterich, Thomas Glenn},
booktitle = {AAAI Fall Symposium: Robots Learning Interactively from Human Teachers},
keywords = {dblp},
publisher = {AAAI},
series = {AAAI Technical Report},
title = {{Active Imitation Learning via Reduction to I.I.D. Active Learning.}},
url = {http://dblp.uni-trier.de/db/conf/aaaifs/aaaifs2012-07.html{\#}JudahFD12},
volume = {FS-12-07},
year = {2012}
}
@inproceedings{CSS10,
author = {Cesa-Bianchi, Nicol{\`{o}} and Shalev-Shwartz, Shai and Shamir, Ohad},
booktitle = {COLT},
pages = {218--231},
title = {{Online learning of noisy data with kernels}},
year = {2010}
}
@article{ziebart2008maximum,
abstract = {Recent research has shown the benefit of framing problems of imitation learning as solutions to Markov Decision Problems. This approach reduces the problem of learning to recovering a utility function that makes the behavior induced by a near-optimal policy closely mimic demonstrated behavior. In this work, we develop a probabilistic approach based on the principle of maximum entropy. Our approach provides a well-defined, globally normalized distribution over decisions, while providing the same performance guarantees as existing methods.We develop our technique in the context of modeling real-world navigation and driving behaviors where collected data is inherently noisy and imperfect. Our probabilistic approach enables modeling of route preferences as well as a powerful new approach to inferring destinations and routes based on partial trajectories.},
author = {Ziebart, Brian and Maas, Andrew and Bagnell, J Andrew and Dey, Anind K},
editor = {Archer, M},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Ziebart et al. - 2008 - Maximum Entropy Inverse Reinforcement Learning.pdf:pdf},
isbn = {9781577353683},
journal = {Proc AAAI},
keywords = {irl},
mendeley-tags = {irl},
publisher = {AAAI Press},
title = {{Maximum Entropy Inverse Reinforcement Learning}},
url = {http://www.aaai.org/Papers/AAAI/2008/AAAI08-227.pdf},
year = {2008}
}
@inproceedings{valko2012semi-supervised,
abstract = {In apprenticeship learning we aim to learn a good policy by observing the behavior of an expert or a set of experts. In particular, we consider the case where the expert acts so as to maximize an unknown reward function defined as a linear combination of a set of state features. In this paper, we consider the setting where we observe many sample trajectories (i.e., sequences of states) but only one or a few of them are labeled as experts' trajectories. We investigate the conditions under which the remaining unlabeled trajectories can help in learning a policy with a good performance. In particular, we define an extension to the max-margin inverse reinforcement learning proposed by Abbeel and Ng (2004) where, at each iteration, the max-margin optimization step is replaced by a semi-supervised optimization problem which favors classifiers separating clusters of trajectories. Finally, we report empirical results on two grid-world domains showing that the semi-supervised algorithm is able to output a better policy in fewer iterations than the related algorithm that does not take the unlabeled trajectories into account.},
author = {Valko, Michal and Ghavamzadeh, Mohammad and Lazaric, Alessandro},
booktitle = {The 24th Journal of Machine Learning Research Proceedings of the 10th European Workshop on Reinforcement Learning},
month = {jun},
pages = {131--241},
publisher = {Sparc},
title = {{Semi-Supervised Apprenticeship Learning}},
url = {http://researchers.lille.inria.fr/{~}valko/hp/serve.php?what=publications/valko2012semi-supervised.pdf},
volume = {24},
year = {2012}
}
@inproceedings{ng2000algorithms,
abstract = {This paper addresses the problem of inverse reinforcement learning (IRL) in Markov decision processes, that is, the problem of extracting a reward function given observed, optimal behaviour. IRL may be useful for apprenticeship learning to acquire skilled behaviour, and for ascertaining the reward function being optimized by a natural system. We rst characterize the set of all reward functions for which a given policy is optimal. We then derive three algorithms for IRL. The rst two deal with the case where the entire policy is known; we handle tabulated reward functions on a nite state space and linear functional approximation of the reward function over a potentially in- nite state space. The third algorithm deals with the more realistic case in which the policy is known only through a nite set of observed trajectories. In all cases, a key issue is degeneracythe existence of a large set of reward functions for which the observed policy is optimal. To remove...},
author = {Ng, Andrew and Russell, Stuart},
booktitle = {Proceedings of the Seventeenth International Conference on Machine Learning},
doi = {10.2460/ajvr.67.2.323},
editor = {{De Sousa}, Jorge Pinho},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Ng, Russell - 2000 - Algorithms for inverse reinforcement learning.pdf:pdf},
issn = {00029645},
pages = {663--670},
pmid = {16454640},
publisher = {Morgan Kaufmann Publishers Inc.},
title = {{Algorithms for inverse reinforcement learning}},
url = {http://www-cs.stanford.edu/people/ang/papers/icml00-irl.pdf},
year = {2000}
}
@phdthesis{shalev-shwartz2007online,
author = {Shalev-Shwartz, Shai},
keywords = {bandits},
mendeley-tags = {bandits},
month = {jul},
school = {The Hebrew University of Jerusalem},
title = {{Online Learning: Theory, Algorithms, and Applications}},
year = {2007}
}
@article{Auer1995,
author = {Auer, Peter and Cesa-Bianchi, Nicol{\`{o}} and Freund, Yoav and Schapire, Robert E},
keywords = {EXP3,adversarial bandits,bandit problem,game theory,matrix game,multi-armed bandit problem,rate of convergence,slot machines,stochastic games,well-behaved stochastic process},
mendeley-tags = {EXP3,adversarial bandits},
month = {oct},
pages = {322},
title = {{Gambling in a rigged casino: The adversarial multi-armed bandit problem}},
url = {http://dl.acm.org/citation.cfm?id=795662.796294},
year = {1995}
}
@article{azuma1967weighted,
author = {Azuma, Kazuoki},
journal = {Tohoku Mathematical Journal},
keywords = {bound,math},
number = {3},
pages = {357--367},
title = {{Weighted sums of certain dependent random variables}},
volume = {19},
year = {1967}
}
@inproceedings{auerlogarithmic,
author = {Auer, Peter and Ortner, Ronald},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {49--56},
title = {{Logarithmic online regret bounds for undiscounted reinforcement learning}}
}
@inproceedings{garivier2011kl,
author = {Garivier, Aur{\'{e}}lien and Capp{\'{e}}, Olivier},
booktitle = {Proceedings of the 24th annual Conference On Learning Theory},
keywords = {bandits},
mendeley-tags = {bandits},
series = {COLT '11},
title = {{The {\{}KL{\}}-{\{}UCB{\}} algorithm for bounded stochastic bandits and beyond}},
year = {2011}
}
@inproceedings{cohen2016online,
author = {Cohen, Alon and Hazan, Tamir and Koren, Tomer},
booktitle = {International Conference on Machine Learning},
title = {{Online learning with feedback graphs without the graphs}},
year = {2016}
}
@phdthesis{filippi2010strategies,
author = {Filippi, Sarah},
keywords = {bandits},
mendeley-tags = {bandits},
school = {T{\{}{\'{e}}{\}}l{\{}{\'{e}}{\}}com ParisTech},
title = {{Strat{\{}{\'{e}}{\}}gies optimistes en apprentissage par renforcement}},
year = {2010}
}
@inproceedings{guillory2011online,
author = {Guillory, Andrew and Bilmes, Jeff},
booktitle = {Neural Information Processing Systems},
title = {{Online submodular set cover, ranking, and repeated active learning}},
year = {2011}
}
@inproceedings{alon2013from,
abstract = {We consider the partial observability model for multi-armed bandits, introduced by Mannor and Shamir. Our main result is a characterization of regret in the directed observability model in terms of the dominating and independence numbers of the observability graph. We also show that in the undirected case, the learner can achieve optimal regret without even accessing the observability graph before selecting an action. Both results are shown using variants of the Exp3 algorithm operating on the observability graph in a time-efficient manner.},
author = {Alon, Noga and Cesa-Bianchi, Nicol{\`{o}} and Gentile, Claudio and Mansour, Yishay},
booktitle = {Neural Information Processing Systems},
title = {{From bandits to experts: A tale of domination and independence}},
year = {2013}
}
@inproceedings{caron2012leveraging,
author = {Caron, St{\'{e}}phane and Kveton, Branislav and Lelarge, Marc and Bhagat, Smriti},
booktitle = {Uncertainty in Artificial Intelligence},
title = {{Leveraging side observations in stochastic bandits.}},
year = {2012}
}
@article{blum2007from,
author = {Blum, Avrim and Mansour, Yishay},
journal = {Journal of Machine Learning Research},
keywords = {bandits},
mendeley-tags = {bandits},
month = {dec},
pages = {1307--1324},
publisher = {JMLR.org},
title = {{From External to Internal Regret}},
volume = {8},
year = {2007}
}
@article{berry1997bandit,
abstract = {We consider a bandit problem consisting of a sequence of {\$}n{\$} choices from an infinite number of Bernoulli arms, with {\$}n /rightarrow /infty{\$}. The objective is to minimize the long-run failure rate. The Bernoulli parameters are independent observations from a distribution {\$}F{\$}. We first assume {\$}F{\$} to be the uniform distribution on (0, 1) and consider various extensions. In the uniform case we show that the best lower bound for the expected failure proportion is between {\$}/sqrt2//sqrtn{\$} and {\$}2//sqrtn{\$} and we exhibit classes of strategies that achieve the latter.},
author = {Berry, Donald A. and Chen, Robert W. and Zame, Alan and Heath, David C. and Shepp, Larry A.},
journal = {Annals of Statistics},
keywords = {Bandit problems,Dynamic allocation of bernoulli processes,Sequential experimentation,Staying with a winner,Switching with a loser},
pages = {2103--2116},
title = {{Bandit problems with infinitely many arms}},
volume = {25},
year = {1997}
}
@inproceedings{kakadeefficient,
author = {Kakade, Sham M and Shalev-Shwartz, Shai and Tewari, Ambuj},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {440--447},
title = {{Efficient bandit algorithms for online multiclass prediction}}
}
@inproceedings{yu2011unimodal,
author = {Yu, Jia Yuan and Mannor, Shie},
booktitle = {International Conference on Machine Learning},
title = {{Unimodal bandits}},
year = {2011}
}
@article{bowling2015heads,
abstract = {Poker is a family of games that exhibit imperfect information, where players do not have full knowledge of past events. Whereas many perfect-information games have been solved (e.g., Connect Four and checkers), no nontrivial imperfect-information game played competitively by humans has previously been solved. Here, we announce that heads-up limit Texas hold'em is now essentially weakly solved. Furthermore, this computation formally proves the common wisdom that the dealer in the game holds a substantial advantage. This result was enabled by a new algorithm, CFR+, which is capable of solving extensive-form games orders of magnitude larger than previously possible. I'll see your program and raise you mine One of the fundamental differences between playing chess and two-handed poker is that the chessboard and the pieces on it are visible throughout the entire game, but an opponent's cards in poker are private. This informational deficit increases the complexity and the uncertainty in calculating the best course of action—to raise, to fold, or to call. Bowling et al. now report that they have developed a computer program that can do just that for the heads-up variant of poker known as Limit Texas Hold 'em (see the Perspective by Sandholm). Science, this issue p. 145; see also p. 122},
author = {Bowling, Michael and Burch, Neil and Johanson, Michael and Tammelin, Oskari},
journal = {Science},
number = {6218},
pages = {145--149},
title = {{Heads-up limit hold'em poker is solved}},
volume = {347},
year = {2015}
}
@inproceedings{agrawal2011analysis,
author = {Agrawal, Shipra and Goyal, Navin},
booktitle = {Conference on Learning Theory},
title = {{Analysis of Thompson sampling for the multi-armed bandit problem}},
year = {2012}
}
@incollection{chapelle2011empirical,
abstract = {Thompson sampling is one of oldest heuristic to address the exploration ex- ploitation trade-off, but it is surprisingly unpopular in the literature. We present here some empirical results using Thompson sampling on simulated and real data, and show that it is highly competitive. And since this heuristic is very easy to implement, we argue that it should be part of the standard baselines to compare against.},
author = {Chapelle, Olivier and Li, Lihong},
booktitle = {Neural Information Processing Systems},
title = {{An empirical evaluation of Thompson sampling}},
year = {2011}
}
@article{auer2002nonstochastic,
author = {Auer, Peter and Cesa-Bianchi, Nicol{\`{o}} and Freund, Yoav and Schapire, Robert E.},
journal = {Journal on Computing},
keywords = {bandits},
mendeley-tags = {bandits},
number = {1},
pages = {48--77},
title = {{The non-stochastic multi-armed bandit problem}},
volume = {32},
year = {2002}
}
@inproceedings{raedt2005,
address = {Bonn, Germany},
booktitle = {Proceedings of the 22nd International Conference on Machine Learning},
editor = {Raedt, Luc De and Wrobel, Stefan},
isbn = {1-59593-180-5},
keywords = {bandits},
mendeley-tags = {bandits},
month = {aug},
publisher = {ACM},
series = {ICML '05, ACM International Conference Proceeding Series},
title = {{No Title}},
volume = {119},
year = {2005}
}
@book{Mary2015a,
abstract = {This paper addresses the on-line recommendation problem facing new users and new items; we assume that no information is available neither about users, nor about the items. The only source of information is a set of ratings given by users to some items. By on-line, we mean that the set of users, and the set of items, and the set of ratings is evolving along time and that at any moment, the recommendation system has to select items to recommend based on the currently available information, that is basically the sequence of past events. We also mean that each user comes with her preferences which may evolve along short and longer scales of time; so we have to continuously update their preferences. When the set of ratings is the only available source of information , the traditional approach is matrix factorization. In a decision making under uncertainty setting, actions should be selected to balance exploration with exploitation; this is best modeled as a bandit problem. Matrix factors provide a latent representation of users and items. These representations may then be used as contextual information by the bandit algorithm to select items. This last point is exactly the originality of this paper: the combination of matrix factorization and bandit algorithms to solve the on-line recommendation problem. Our work is driven by considering the recommendation problem as a feedback controlled loop. This leads to interactions between the representation learning, and the recommendation policy.},
address = {Cham},
author = {Mary, J{\'{e}}r{\'{e}}mie and Gaudel, Romaric and Preux, Philippe},
booktitle = {First International Workshop on Machine Learning, Optimization, and Big Data (MOD'15)},
editor = {Pardalos, Panos and Pavone, Mario and Farinella, Giovanni Maria and Cutello, Vincenzo},
keywords = {Collaborative Filtering,Matrix Factorization,Multi-Armed Bandtis,Recommender Systems,contextual Bandits,sequential Recommender Systems},
language = {en},
month = {jul},
pages = {325--336},
publisher = {Springer International Publishing},
series = {Lecture Notes in Computer Science},
title = {{Machine Learning, Optimization, and Big Data}},
url = {https://hal.inria.fr/hal-01256033},
volume = {9432},
year = {2015}
}
@inproceedings{flaxman2005online,
author = {Flaxman, Abraham D and Kalai, Adam Tauman and {Brendan McMahan}, Hugh},
booktitle = {Proceedings of the 16th annual ACM-SIAM Symposium On Discrete Algorithms},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {385--394},
publisher = {SIAM},
series = {SODA '05},
title = {{Online convex optimization in the bandit setting: gradient descent without a gradient}},
year = {2005}
}
@inproceedings{hazanextracting,
author = {Hazan, Elad and Kale, Satyen},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {57--68},
title = {{Extracting Certainty from Uncertainty: Regret Bounded by Variation in Costs.}}
}
@inproceedings{cesa-bianchi2012combinatorial,
abstract = {We study sequential prediction problems in which, at each time instance, the forecaster chooses a vector from a given finite set S⊂ Rd. At the same time, the opponent chooses a loss vector in Rd and the forecaster suffers a loss that is the inner product of the two vectors. The goal of the forecaster is to achieve that, in the long run, the accumulated loss is not much larger than that of the best possible element in S. We consider the bandit setting in which the forecaster only has access to the losses of the chosen vectors (i.e., the entire loss vectors are not observed). We introduce a variant of a strategy by Dani, Hayes and Kakade achieving a regret bound that, for a variety of concrete choices of S, is of order √ndln|S| where n is the time horizon. This is not improvable in general and is better than previously known bounds. The examples we consider are all such that S⊂{\{} 0,1{\}}d, and we show how the combinatorial structure of these classes can be exploited to improve the regret bounds. We also point out computationally efficient implementations for various interesting choices of S. {\textcopyright} 2012 Elsevier Inc.},
author = {Cesa-Bianchi, Nicol{\`{o}} and Lugosi, G{\'{a}}bor},
booktitle = {Journal of Computer and System Sciences},
keywords = {Adversarial bandit problems,Online linear optimization,Online prediction},
number = {5},
pages = {1404--1422},
title = {{Combinatorial bandits}},
volume = {78},
year = {2012}
}
@article{Azar2014a,
abstract = {In this paper we consider the problem of online stochastic optimization of a locally smooth function under bandit feedback. We introduce the high-confidence tree (HCT) algorithm, a novel any-time {\$}\backslashmathcal{\{}X{\}}{\$}-armed bandit algorithm, and derive regret bounds matching the performance of existing state-of-the-art in terms of dependency on number of steps and smoothness factor. The main advantage of HCT is that it handles the challenging case of correlated rewards, whereas existing methods require that the reward-generating process of each arm is an identically and independent distributed (iid) random process. HCT also improves on the state-of-the-art in terms of its memory requirement as well as requiring a weaker smoothness assumption on the mean-reward function in compare to the previous anytime algorithms. Finally, we discuss how HCT can be applied to the problem of policy search in reinforcement learning and we report preliminary empirical results.},
author = {Azar, Mohammad Gheshlaghi and Lazaric, Alessandro and Brunskill, Emma},
month = {feb},
title = {{Online Stochastic Optimization under Correlated Bandit Feedback}},
url = {http://arxiv-web3.library.cornell.edu/abs/1402.0562},
year = {2014}
}
@techreport{li2016efficient,
abstract = {Performance of machine learning algorithms depends critically on identifying a good set of hyperparameters. While current methods offer efficiencies by adaptively choosing new configurations to train, an alternative strategy is to adaptively allocate resources across the selected configurations. We formulate hyperparameter optimization as a pure-exploration non-stochastic infinitely many armed bandit problem where allocation of additional resources to an arm corresponds to training a configuration on larger subsets of the data. We introduce Hyperband for this framework and analyze its theoretical properties, providing several desirable guarantees. We compare Hyperband with state-of-the-art Bayesian optimization methods and a random search baseline on a comprehensive benchmark including 117 datasets. Our results on this benchmark demonstrate that while Bayesian optimization methods do not outperform random search trained for twice as long, Hyperband in favorable settings offers valuable speedups.},
archivePrefix = {arXiv},
arxivId = {1603.06560},
author = {Li, Lisha and Jamieson, Kevin and DeSalvo, Giulia and Rostamizadeh, Afshin and Talwalkar, Ameet},
booktitle = {arXiv preprint},
eprint = {1603.06560},
title = {{Efficient hyperparameter optimization and infinitely many armed bandits}},
year = {2016}
}
@inproceedings{korda2013thompson,
abstract = {Thompson Sampling has been demonstrated in many complex bandit models, however the theoretical guarantees available for the parametric multi-armed bandit are still limited to the Bernoulli case. Here we extend them by proving asymptotic optimality of the algorithm using the Jeffreys prior for 1-dimensional exponential family bandits. Our proof builds on previous work, but also makes extensive use of closed forms for Kullback-Leibler divergence and Fisher information (and thus Jeffreys prior) available in an exponential family. This allow us to give a finite time exponential concentration inequality for posterior distributions on exponential families that may be of interest in its own right. Moreover our analysis covers some distributions for which no optimistic algorithm has yet been proposed, including heavy-tailed exponential families.},
archivePrefix = {arXiv},
arxivId = {1307.3400},
author = {Korda, Nathaniel and Kaufmann, Emilie and Munos, R{\'{e}}mi},
booktitle = {Neural Information Processing Systems},
eprint = {1307.3400},
title = {{Thompson Sampling for 1-Dimensional Exponential Family Bandits}},
year = {2013}
}
@inproceedings{cesa-bianchi2010online,
author = {Cesa-Bianchi, Nicol{\`{o}} and Shalev-Shwartz, Shai and Shamir, Oha},
booktitle = {Conference on Learning Theory},
title = {{Online learning of noisy data with kernels}},
year = {2010}
}
@article{wang2005bandit,
author = {Wang, Chih-chun and Kulkarni, Sanjeev R and Poor, H Vincent},
journal = {IEEE Transactions on Automatic Control},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {338--355},
title = {{Bandit problems with side observations}},
volume = {50},
year = {2005}
}
@inproceedings{krause11contextual,
author = {Krause, Andreas and Ong, Cheng Soon},
booktitle = {Proceedings of Neural Information Processing Systems (NIPS)},
title = {{Contextual Gaussian Process Bandit Optimization}},
year = {2011}
}
@article{carpentier2014asimple,
author = {Carpentier, Alexandra and Valko, Michal},
journal = {ArXiv e-prints},
title = {{Simple regret for infinitely many armed bandits}},
year = {2015}
}
@inproceedings{slivkins2008adapting,
author = {Slivkins, Aleksandrs and Upfal, Eli},
booktitle = {COLT},
editor = {Servedio, Rocco A and Zhang, Tong},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {343--354},
publisher = {Omnipress},
title = {{Adapting to a Changing Environment: the Brownian Restless Bandits}},
year = {2008}
}
@incollection{russo2013eluder,
author = {Russo, Daniel and {Van Roy}, Benjamin},
booktitle = {Neural Information Processing Systems},
title = {{Eluder Dimension and the Sample Complexity of Optimistic Exploration}},
url = {http://papers.nips.cc/paper/4909-eluder-dimension-and-the-sample-complexity-of-optimistic-exploration.pdf},
year = {2013}
}
@inproceedings{fang2014networked,
author = {Fang, Meng and Tao, Dacheng},
booktitle = {Internattional Conference on Knowledge Discovery and Data Mining},
keywords = {exploration/exploitation dilemma,networked bandits,social network},
title = {{Networked bandits with disjoint linear payoffs}},
year = {2014}
}
@inproceedings{abernethyoptimal,
author = {Abernethy, Jacob D and Bartlett, Peter L and Rakhlin, Alexander and Tewari, Ambuj},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Optimal strategies and minimax lower bounds for online convex games}}
}
@article{Prisadnikov2014a,
author = {Prisadnikov, Nedyalko},
publisher = {ETH-Z{\"{u}}rich, Department of Computer Science},
title = {{Exploration-exploitation trade-offs via probabilistic matrix factorization}},
url = {http://e-collection.library.ethz.ch/view/eth:14399},
year = {2014}
}
@article{burnetas1997optimal,
address = {Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA},
author = {Burnetas, Apostolos N and Katehakis, Micha{\"{e}}l N},
issn = {0364-765X},
journal = {Mathematics of Operations Research},
keywords = {bandits},
mendeley-tags = {bandits},
month = {feb},
number = {1},
pages = {222--255},
publisher = {INFORMS},
title = {{Optimal adaptive policies for Markov decision processes}},
volume = {22},
year = {1997}
}
@inproceedings{ortneronline,
author = {Ortner, Ronald},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {123--137},
title = {{Online Regret Bounds for Markov Decision Processes with Deterministic Transitions}}
}
@inproceedings{bnaya2013bandit,
author = {Bnaya, Zahy and Puzis, Rami and Stern, Roni and Felner, Ariel},
booktitle = {International Conference on Social Computing},
keywords = {Data mining,Educational institutions,Equations,Heuristic algorithms,Heuristics Search,Mathematical model,Multi-armed bandit,Social Network Intelligence,Social network services,Tin,VMAB,bandit algorithms,multiarmed bandit problem with volatile arms,profiles matching,query processing,search criterion,search engines,social network crawler,social network queries,social networking (online),targeted crawling},
title = {{Bandit algorithms for social network queries}},
year = {2013}
}
@inproceedings{gabillon2014largescale,
author = {Gabillon, Victor and Kveton, Branislav and Wen, Zheng and Eriksson, Brian and Muthukrishnan, S.},
booktitle = {AAAI Conference on Artificial Intelligence},
title = {{Large-scale optimistic adaptive submodularity}},
year = {2014}
}
@inproceedings{munos1999variable,
author = {Munos, R{\'{e}}mi and Moore, Andrew},
booktitle = {Proceedings of the 16th International Joint Conference on Artificial Intelligence},
pages = {1348--1355},
title = {{Variable Resolution Discretization for High-Accuracy Solutions of Optimal Control Problems}},
year = {1999}
}
@inproceedings{kleinberg2008multi,
author = {Kleinberg, Robert and Slivkins, Aleksandrs and Upfal, Eli},
booktitle = {Symposium on Theory Of Computing},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Multi-armed bandit problems in metric spaces}},
year = {2008}
}
@inproceedings{hazan2011beyond,
author = {Hazan, Elad and Kale, Satyen},
booktitle = {Conference on Learning Theory},
title = {{Beyond the regret minimization barrier: an optimal algorithm for stochastic strongly-convex optimization.}},
year = {2011}
}
@article{harisson1978,
author = {Harrison, D and Rubinfeld, D L},
journal = {J. Environ. Economics {\&} Management},
pages = {81--102},
title = {{Hedonic prices and the demand for clean air}},
volume = {5},
year = {1978}
}
@inproceedings{thrun2003,
address = {Vancouver, British Columbia, Canada},
booktitle = {Proceedings of the 17th conference on advances in Neural Information Processing Systems},
editor = {Thrun, Sebastian and Saul, Lawrence K and Sch{\"{o}}lkopf, Bernhard},
isbn = {0-262-20152-6},
month = {dec},
publisher = {MIT Press},
series = {NIPS '03},
title = {{No Title}},
year = {2003}
}
@techreport{combes2015unimodal,
abstract = {We consider stochastic bandit problems with a continuous set of arms and where the expected reward is a continuous and unimodal function of the arm. No further assumption is made regarding the smoothness and the structure of the expected reward function. For these problems, we propose the Stochastic Pentachotomy (SP) algorithm, and derive finite-time upper bounds on its regret and optimization error. In particular, we show that, for any expected reward function {\$}\backslashmu{\$} that behaves as {\$}\backslashmu(x)=\backslashmu(x{\^{}}\backslashstar)-C|x-x{\^{}}\backslashstar|{\^{}}\backslashxi{\$} locally around its maximizer {\$}x{\^{}}\backslashstar{\$} for some {\$}\backslashxi, C{\textgreater}0{\$}, the SP algorithm is order-optimal. Namely its regret and optimization error scale as {\$}O(\backslashsqrt{\{}T\backslashlog(T){\}}){\$} and {\$}O(\backslashsqrt{\{}\backslashlog(T)/T{\}}){\$}, respectively, when the time horizon {\$}T{\$} grows large. These scalings are achieved without the knowledge of {\$}\backslashxi{\$} and {\$}C{\$}. Our algorithm is based on asymptotically optimal sequential statistical tests used to successively trim an interval that contains the best arm with high probability. To our knowledge, the SP algorithm constitutes the first sequential arm selection rule that achieves a regret and optimization error scaling as {\$}O(\backslashsqrt{\{}T{\}}){\$} and {\$}O(1/\backslashsqrt{\{}T{\}}){\$}, respectively, up to a logarithmic factor for non-smooth expected reward functions, as well as for smooth functions with unknown smoothness.},
archivePrefix = {arXiv},
arxivId = {1406.7447},
author = {Combes, Richard and Prouti{\`{e}}re, Alexandre},
eprint = {1406.7447},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Combes, Prouti{\`{e}}re - 2015 - Unimodal Bandits without Smoothness.pdf:pdf},
title = {{Unimodal Bandits without Smoothness}},
year = {2015}
}
@inproceedings{kocak2014efficient,
abstract = {We consider online learning problems under a a partial observability model capturing situations where the information conveyed to the learner is between full information and bandit feedback. In the simplest variant, we assume that in addition to its own loss, the learner also gets to observe losses of some other actions. The revealed losses depend on the learner's action and a directed observation system chosen by the environment. For this setting, we propose the first algorithm that enjoys near-optimal regret guarantees without having to know the observation system before selecting its actions. Along similar lines, we also define a new partial information setting that models online combinatorial optimization problems where the feedback received by the learner is between semi-bandit and full feedback. As the predictions of our first algorithm cannot be always computed efficiently in this setting, we propose another algorithm with similar properties and with the benefit of always being computationally efficient, at the price of a slightly more complicated tuning mechanism. Both algorithms rely on a novel exploration strategy called implicit exploration, which is shown to be more efficient both computationally and information-theoretically than previously studied exploration strategies for the problem.},
author = {Koc{\'{a}}k, Tom{\'{a}}{\v{s}} and Neu, Gergely and Valko, Michal and Munos, R{\'{e}}mi},
booktitle = {Neural Information Processing Systems},
title = {{Efficient learning by implicit exploration in bandit problems with side observations}},
year = {2014}
}
@inproceedings{preux2014bandits,
abstract = {We consider function optimization as a sequential decision making problem under the budget constraint. Such constraint limits the number of objective function evaluations allowed during the optimization. We consider an algorithm inspired by a continuous version of a multi-armed bandit problem which attacks this optimization problem by solving the tradeoff between exploration (initial quasi-uniform search of the domain) and exploitation (local optimization around the potentially global maxima). We introduce the so-called Simultaneous Optimistic Optimization (SOO), a deterministic algorithm that works by domain partitioning. The benefit of such an approach are the guarantees on the returned solution and the numerical eficiency of the algorithm. We present this machine learning rooted approach to optimization, and provide the empirical assessment of SOO on the CEC'2014 competition on single objective real-parameter numerical optimization testsuite.},
author = {Preux, Philippe and Munos, R{\'{e}}mi and Valko, Michal},
booktitle = {Congress on Evolutionary Computation},
title = {{Bandits attack function optimization}},
year = {2014}
}
@inproceedings{bartlettadaptive,
author = {Bartlett, Peter L and Hazan, Elad and Rakhlin, Alexander},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {65--72},
title = {{Adaptive Online Gradient Descent.}}
}
@article{munos2002variable,
author = {Munos, R{\'{e}}mi and Moore, Andrew},
journal = {Machine Learning},
pages = {291--323},
title = {{Variable Resolution Discretization in Optimal Control}},
volume = {49},
year = {2002}
}
@inproceedings{lafferty2010,
address = {Vancouver, British Columbia, Canada},
booktitle = {Proceedings of the 24th conference on advances in Neural Information Processing Systems},
editor = {Lafferty, John D and Williams, Chris K I and Shawe-Taylor, John and Zemel, Richard S and Culotta, Aron},
month = {dec},
series = {NIPS '10},
title = {{No Title}},
year = {2010}
}
@inproceedings{singla2015information,
abstract = {How should we gather information in a network, where each node's visibility is limited to its local neighborhood? This problem arises in numerous real-world applications, such as surveying and task routing in social networks, team formation in collaborative networks and experimental design with dependency constraints. Often the informativeness of a set of nodes can be quantified via a submodular utility function. Existing approaches for submodular optimization, however, require that the set of all nodes that can be selected is known ahead of time, which is often unrealistic. In contrast, we propose a novel model where we start our exploration from an initial node, and new nodes become visible and available for selection only once one of their neighbors has been chosen. We then present a general algorithm NetExp for this problem, and provide theoretical bounds on its performance dependent on structural properties of the underlying network. We evaluate our methodology on various simulated problem instances as well as on data collected from social question answering system deployed within a large enterprise.},
author = {Singla, Adish and Horvitz, Eric and Kohli, Pushmeet and White, Ryen and Krause, Andreas},
booktitle = {International Joint Conferences on Artificial Intelligence},
title = {{Information gathering in networks via active exploration}},
year = {2015}
}
@inproceedings{kanade2009sleeping,
author = {Kanade, Varun and McMahan, H Brendan and Bryan, Brent},
booktitle = {Proceedings of the 12th international conference on Artificial Intelligence and Statistics},
keywords = {bandits},
mendeley-tags = {bandits},
number = {5},
pages = {272--279},
series = {AI{\&}Stats '09},
title = {{Sleeping Experts and Bandits with Stochastic Action Availability and Adversarial Rewards}},
year = {2009}
}
@inproceedings{narayananrandom,
author = {Narayanan, Hariharan and Rakhlin, Alexander},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {1777--1785},
title = {{Random Walk Approach to Regret Minimization}}
}
@book{neumaier2008interval,
author = {Neumaier, Arnold},
isbn = {9780521102148},
publisher = {Cambridge University Press},
series = {Encyclopedia of Mathematics and its Applications},
title = {{Interval Methods for Systems of Equations}},
url = {http://books.google.fr/books?id=ObInPwAACAAJ},
year = {2008}
}
@inproceedings{yue2012hierarchical,
address = {New York, NY, USA},
author = {Yue, Yisong and Hong, Sue A and Guestrin, Carlos},
booktitle = {Proceedings of the 29th International Conference on Machine Learning (ICML-12)},
editor = {Langford, John and Pineau, Joelle},
pages = {1895--1902},
publisher = {ACM},
title = {{Hierarchical Exploration for Accelerating Contextual Bandits}},
url = {http://icml.cc/2012/papers/933.pdf},
year = {2012}
}
@inproceedings{audibert2011minimax,
author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien and Lugosi, Gabor},
booktitle = {Proceedings of the 24th annual Conference On Learning Theory},
keywords = {bandits},
mendeley-tags = {bandits},
series = {COLT '11},
title = {{Minimax Policies for Combinatorial Prediction Games}},
year = {2011}
}
@article{hager1989updating,
author = {Hager, W W},
journal = {SIAM review},
pages = {221--239},
publisher = {JSTOR},
title = {{Updating the inverse of a matrix}},
year = {1989}
}
@article{thompson1933likelihood,
author = {Thompson, William R.},
journal = {Biometrika},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {285--294},
title = {{On the likelihood that one unknown probability exceeds another in view of the evidence of two samples}},
volume = {25},
year = {1933}
}
@inproceedings{Neu2015a,
author = {Neu, Gergely},
booktitle = {Advances in Neural Information Processing Systems},
pages = {3150--3158},
title = {{Explore no more: Improved high-probability regret bounds for non-stochastic bandits}},
url = {http://machinelearning.wustl.edu/mlpapers/papers/NIPS2015{\_}5732},
year = {2015}
}
@unpublished{honda2010asymptotically,
annote = {arXiv:0905.2776},
author = {Honda, Junya and Takemura, Akimichi},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{An Asymptotically Optimal Policy for Finite Support Models in the Multiarmed Bandit Problem}},
year = {2010}
}
@article{Gopalan2013a,
abstract = {We consider stochastic multi-armed bandit problems with complex actions over a set of basic arms, where the decision maker plays a complex action rather than a basic arm in each round. The reward of the complex action is some function of the basic arms' rewards, and the feedback observed may not necessarily be the reward per-arm. For instance, when the complex actions are subsets of the arms, we may only observe the maximum reward over the chosen subset. Thus, feedback across complex actions may be coupled due to the nature of the reward function. We prove a frequentist regret bound for Thompson sampling in a very general setting involving parameter, action and observation spaces and a likelihood function over them. The bound holds for discretely-supported priors over the parameter space and without additional structural properties such as closed-form posteriors, conjugate prior structure or independence across arms. The regret bound scales logarithmically with time but, more importantly, with an improved constant that non-trivially captures the coupling across complex actions due to the structure of the rewards. As applications, we derive improved regret bounds for classes of complex bandit problems involving selecting subsets of arms, including the first nontrivial regret bounds for nonlinear MAX reward feedback from subsets.},
author = {Gopalan, Aditya and Mannor, Shie and Mansour, Yishay},
month = {nov},
title = {{Thompson Sampling for Complex Bandit Problems}},
url = {http://arxiv.org/abs/1311.0466},
year = {2013}
}
@inproceedings{gavald`a2009,
address = {Porto, Portugal},
booktitle = {Proceedings of the 20th international conference on Algorithmic Learning Theory},
editor = {Gavald{\`{a}}, Ricard and Lugosi, G{\'{a}}bor and Zeugmann, Thomas and Zilles, Sandra},
isbn = {978-3-642-04413-7},
keywords = {bandits},
mendeley-tags = {bandits},
month = {oct},
publisher = {Springer},
series = {ALT '09, Lecture Notes in Computer Science},
title = {{No Title}},
volume = {5809},
year = {2009}
}
@inproceedings{dani2008price,
author = {Dani, Varsha and Hayes, Thomas P and Kakade, Sham M},
booktitle = {Neural Information Processing Systems},
issn = {00368075},
keywords = {bandits},
mendeley-tags = {bandits},
publisher = {MIT Press},
title = {{The Price of Bandit Information for Online Optimization}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.71.4607{\&}rep=rep1{\&}type=pdf},
year = {2008}
}
@inproceedings{bshouty2007,
address = {San Diego, CA, USA},
booktitle = {Proceedings of the 20th annual Conference On Learning Theory},
editor = {Bshouty, Nader H and Gentile, Claudio},
isbn = {978-3-540-72925-9},
keywords = {bandits},
mendeley-tags = {bandits},
month = {jun},
publisher = {Springer},
series = {COLT '07, Lecture Notes in Computer Science},
title = {{No Title}},
volume = {4539},
year = {2007}
}
@inproceedings{servedio2008,
address = {Helsinki, Finland},
booktitle = {Proceedings of the 21st annual Conference On Learning Theory},
editor = {Servedio, Rocco A and Zhang, Tong},
month = {jul},
publisher = {Omnipress},
series = {COLT '08},
title = {{No Title}},
volume = {80},
year = {2008}
}
@article{gai2012combinatorial,
abstract = {In the classic multi-armed bandits problem, the goal is to have a policy for dynamically operating arms that each yield stochastic rewards with unknown means. The key metric of interest is regret, defined as the gap between the expected total reward accumulated by an omniscient player that knows the reward means for each arm, and the expected total reward accumulated by the given policy. The policies presented in prior work have storage, computation and regret all growing linearly with the number of arms, which is not scalable when the number of arms is large. We consider in this work a broad class of multi-armed bandits with dependent arms that yield rewards as a linear combination of a set of unknown parameters. For this general framework, we present efficient policies that are shown to achieve regret that grows logarithmically with time, and polynomially in the number of unknown parameters (even though the number of dependent arms may grow exponentially). Furthermore, these policies only require storage that grows linearly in the number of unknown parameters. We show that this generalization is broadly applicable and useful for many interesting tasks in networks that can be formulated as tractable combinatorial optimization problems with linear objective functions, such as maximum weight matching, shortest path, and minimum spanning tree computations.},
author = {Gai, Yi and Krishnamachari, Bhaskar and Jain, Rahul},
journal = {Transactions on Networking},
keywords = {Combinatorial network optimization,multi-armed bandits (MABs),online learning},
number = {5},
pages = {1466--1478},
title = {{Combinatorial network optimization with unknown variables: Multi-armed bandits with linear rewards and individual observations}},
volume = {20},
year = {2012}
}
@inproceedings{cohen2006,
address = {Pittsburgh, Pennsylvania, USA},
booktitle = {Proceedings of the 23rd International Conference on Machine Learning},
editor = {Cohen, William W and Moore, Andrew},
isbn = {1-59593-383-2},
month = {jun},
publisher = {ACM},
series = {ICML '06, ACM International Conference Proceeding Series},
title = {{No Title}},
volume = {148},
year = {2006}
}
@article{kaufmann2012thompson,
abstract = {The question of the optimality of Thompson Sampling for solving the stochastic multi-armed bandit problem had been open since 1933. In this paper we answer it positively for the case of Bernoulli rewards by providing the first finite-time analysis that matches the asymptotic rate given in the Lai and Robbins lower bound for the cumulative regret. The proof is accompanied by a numerical comparison with other optimal policies, experiments that have been lacking in the literature until now for the Bernoulli case.},
archivePrefix = {arXiv},
arxivId = {1205.4217},
author = {Kaufmann, Emilie and Korda, Nathaniel and Munos, R{\'{e}}mi},
eprint = {1205.4217},
journal = {Algorithmic Learning Theory},
title = {{Thompson Sampling: An Asymptotically Optimal Finite Time Analysis}},
url = {http://link.springer.com/chapter/10.1007/978-3-642-34106-9{\_}18{\%}5Cnhttp://arxiv.org/abs/1205.4217},
year = {2012}
}
@inproceedings{billsus2000learning,
author = {Billsus, Daniel and Pazzani, Michael J. and Chen, James},
booktitle = {International Conference on Intelligent User Interfaces},
title = {{A learning agent for wireless news access}},
year = {2000}
}
@inproceedings{grunewalder2010regret,
author = {Gr{\"{u}}new{\"{a}}lder, Steffen and Audibert, Jean-Yves and Opper, Manfred and Shawe-Taylor, John},
booktitle = {Proceedings of the 13th International Conference on Artificial Intelligence and Statistics},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Regret Bounds for Gaussian Process Bandit Problems}},
year = {2010}
}
@inproceedings{gabillon2013adaptive,
author = {Gabillon, Victor and Kveton, Branislav and Wen, Zheng and Eriksson, Brian and Muthukrishnan, S.},
booktitle = {Neural Information Processing Systems},
title = {{Adaptive submodular maximization in bandit setting}},
year = {2013}
}
@article{auer2002finite,
address = {Hingham, MA, USA},
author = {Auer, Peter and Cesa-Bianchi, Nicol{\`{o}} and Fischer, Paul},
journal = {Machine Learning},
keywords = {adaptive allocation rules,bandit problems,bandits,finite horizon regret},
mendeley-tags = {bandits},
number = {2-3},
pages = {235--256},
publisher = {Kluwer Academic Publishers},
title = {{Finite-time analysis of the multiarmed bandit problem}},
volume = {47},
year = {2002}
}
@article{audibert2010best,
abstract = {We consider the problem of finding the best arm in a stochastic multi-armed bandit game. The regret of a forecaster is here defined by the gap between the mean reward of the optimal arm and the mean reward of the ultimately chosen arm. We propose a highly exploring UCB policy and a new algorithm based on successive rejects. We show that these algorithms are essentially optimal since their regret decreases exponentially at a rate which is, up to a logarithmic factor, the best possible. However, while the UCB policy needs the tuning of a parameter depending on the unobservable hardness of the task, the successive rejects policy benefits from being parameter-free, and also independent of the scaling of the rewards.},
annote = {From Duplicate 3 ( Best arm identification in multi-armed bandits - Audibert, Jean-Yves; Bubeck, S{\'{e}}bastien; Munos, R{\'{e}}mi )
},
author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi},
journal = {Conference on Learning Theory},
keywords = {learning,statistics {\&} optimisation,theory {\&} algorithms},
title = {{Best arm identification in multi-armed bandits}},
year = {2010}
}
@inproceedings{lugosi2006,
address = {Pittsburgh, PA, USA},
booktitle = {Proceedings of the 19th annual Conference On Learning Theory},
editor = {Lugosi, G{\'{a}}bor and Simon, Hans-Ulrich},
isbn = {3-540-35294-5},
keywords = {bandits},
mendeley-tags = {bandits},
month = {jun},
publisher = {Springer},
series = {COLT '06, Lecture Notes in Computer Science},
title = {{No Title}},
volume = {4005},
year = {2006}
}
@article{Cesa-Bianchi2016a,
abstract = {We study networks of communicating learning agents that cooperate to solve a common nonstochastic bandit problem. Agents use an underlying communication network to get messages about actions selected by other agents, and drop messages that took more than {\$}d{\$} hops to arrive, where {\$}d{\$} is a delay parameter. We introduce $\backslash$textsc{\{}Exp3-Coop{\}}, a cooperative version of the {\{}$\backslash$sc Exp3{\}} algorithm and prove that with {\$}K{\$} actions and {\$}N{\$} agents the average per-agent regret after {\$}T{\$} rounds is at most of order {\$}\backslashsqrt{\{}\backslashbigl(d+1 + \backslashtfrac{\{}K{\}}{\{}N{\}}\backslashalpha{\_}{\{}\backslashle d{\}}\backslashbigr)(T\backslashln K){\}}{\$}, where {\$}\backslashalpha{\_}{\{}\backslashle d{\}}{\$} is the independence number of the {\$}d{\$}-th power of the connected communication graph {\$}G{\$}. We then show that for any connected graph, for {\$}d=\backslashsqrt{\{}K{\}}{\$} the regret bound is {\$}K{\^{}}{\{}1/4{\}}\backslashsqrt{\{}T{\}}{\$}, strictly better than the minimax regret {\$}\backslashsqrt{\{}KT{\}}{\$} for noncooperating agents. More informed choices of {\$}d{\$} lead to bounds which are arbitrarily close to the full information minimax regret {\$}\backslashsqrt{\{}T\backslashln K{\}}{\$} when {\$}G{\$} is dense. When {\$}G{\$} has sparse components, we show that a variant of $\backslash$textsc{\{}Exp3-Coop{\}}, allowing agents to choose their parameters according to their centrality in {\$}G{\$}, strictly improves the regret. Finally, as a by-product of our analysis, we provide the first characterization of the minimax regret for bandit learning with delay.},
author = {Cesa-Bianchi, Nicol{\`{o}} and Gentile, Claudio and Mansour, Yishay and Minora, Alberto},
month = {feb},
pages = {27},
title = {{Delay and Cooperation in Nonstochastic Bandits}},
url = {http://arxiv.org/abs/1602.04741},
year = {2016}
}
@inproceedings{shawe-taylor2004,
address = {Banff, Canada},
booktitle = {Proceedings of the 17th annual Conference On Learning Theory},
editor = {Shawe-Taylor, John and Singer, Yoram},
isbn = {3-540-22282-0},
keywords = {bandits},
mendeley-tags = {bandits},
month = {jul},
publisher = {Springer},
series = {COLT '04, Lecture Notes in Computer Science},
title = {{No Title}},
volume = {3120},
year = {2004}
}
@techreport{lehrer2003wide,
author = {Lehrer, Ehud and Rosenberg, Dinah},
institution = {EconWPA},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{A Wide Range No-Regret Theorem}},
type = {Game Theory and Information},
year = {2003}
}
@article{li2010contextual,
abstract = {Personalized web services strive to adapt their services (advertisements, news articles, etc) to individual users by making use of both content and user information. Despite a few recent advances, this problem remains challenging for at least two reasons. First, web service is featured with dynamically changing pools of content, rendering traditional collaborative filtering methods inapplicable. Second, the scale of most web services of practical interest calls for solutions that are both fast in learning and computation. In this work, we model personalized recommendation of news articles as a contextual bandit problem, a principled approach in which a learning algorithm sequentially selects articles to serve users based on contextual information about the users and articles, while simultaneously adapting its article-selection strategy based on user-click feedback to maximize total user clicks. The contributions of this work are three-fold. First, we propose a new, general contextual bandit algorithm that is computationally efficient and well motivated from learning theory. Second, we argue that any bandit algorithm can be reliably evaluated offline using previously recorded random traffic. Finally, using this offline evaluation method, we successfully applied our new algorithm to a Yahoo! Front Page Today Module dataset containing over 33 million events. Results showed a 12.5{\%} click lift compared to a standard context-free bandit algorithm, and the advantage becomes even greater when data gets more scarce.},
author = {Li, Lihong and Chu, Wei and Langford, John and Schapire, Robert E.},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Li et al. - 2010 - A Contextual-Bandit Approach to Personalized News Article Recommendation.0146:0146},
institution = {ACM},
journal = {International World Wide Web Conference},
keywords = {contextual bandit,exploitation dilemma,exploration,personalization,recommender sys,tems,web service},
publisher = {ACM Press},
title = {{A contextual-bandit approach to personalized news article recommendation}},
year = {2010}
}
@article{Kolla2016,
abstract = {We consider a collaborative online learning paradigm, wherein a group of agents connected through a social network are engaged in playing a stochastic multi-armed bandit game. Each time an agent takes an action, the corresponding reward is instantaneously observed by the agent, as well as its neighbours in the social network. We perform a regret analysis of various policies in this collaborative learning setting. A key finding of this paper is that natural extensions of widely-studied single agent learning policies to the network setting need not perform well in terms of regret. In particular, we identify a class of non-altruistic and individually consistent policies, and argue by deriving regret lower bounds that they are liable to suffer a large regret in the networked setting. We also show that the learning performance can be substantially improved if the agents exploit the structure of the network, and develop a simple learning algorithm based on dominating sets of the network. Specifically, we first consider a star network, which is a common motif in hierarchical social networks, and show analytically that the hub agent can be used as an information sink to expedite learning and improve the overall regret. We also derive networkwide regret bounds for the algorithm applied to general networks. We conduct numerical experiments on a variety of networks to corroborate our analytical results.},
author = {Kolla, Ravi Kumar and Jagannathan, Krishna and Gopalan, Aditya},
month = {feb},
pages = {14},
title = {{Collaborative Learning of Stochastic Bandits over a Social Network}},
url = {http://arxiv.org/abs/1602.08886},
year = {2016}
}
@inproceedings{bartletthigh,
author = {Bartlett, Peter L and Dani, Varsha and Hayes, Thomas P and Kakade, Sham M and Rakhlin, Alexander and Tewari, Ambuj},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {335--342},
title = {{High-probability Regret Bounds for Bandit Online Linear Optimization}}
}
@article{lai1985asymptotically,
author = {Lai, Tze L and Robbins, Herbert},
journal = {Advances in Applied Mathematics},
keywords = {bandit,bandits},
mendeley-tags = {bandits},
number = {1},
pages = {4--22},
publisher = {Elsevier},
title = {{Asymptotically efficient adaptive allocation rules}},
volume = {6},
year = {1985}
}
@inproceedings{bartlett2009regal,
address = {Arlington, Virginia, United States},
author = {Bartlett, Peter L and Tewari, Ambuj},
booktitle = {Proceedings of the 25th conference on Uncertainty in Artificial Intelligence},
isbn = {978-0-9749039-5-8},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {35--42},
publisher = {AUAI Press},
series = {UAI '09},
title = {{REGAL: a regularization based algorithm for reinforcement learning in weakly communicating MDPs}},
year = {2009}
}
@article{bnaya2013social,
abstract = {In many cases the best way to find a profile or a set of profiles matching some criteria in a socialnetwork is via targeted crawling. An important challenge in targeted crawling is choosing the next profileto explore. Existing heuristics for targeted crawling are usually tailored for specific search criterionand could lead to short-sighted crawling decisions. In this paper we propose and evaluate a generic ap- proach for guiding targeted crawling which is based on recent developments in Artificial Intelligence. Ourapproach, based on the recently introduced variant of the Multi-Armed Bandit problem with volatile arms(VMAB), aims to provide a proper balance between exploration and exploitation during the crawling process. Unlike other heuristics which are hand tailored for specific type of search queries, our approach isgeneral-purpose. In addition, it provides provable performance guarantees. Experimental results indicate that our approach compares favorably with the best existing heuristics on two different domains.},
author = {Bnaya, Zahy and Puzis, Rami and Stern, Roni and Felner, Ariel},
journal = {Human Journal},
number = {2},
pages = {84--98},
title = {{Social network search as a volatile multi-armed bandit problem}},
volume = {2},
year = {2013}
}
@inproceedings{carpentier2014extreme,
abstract = {In many areas of medicine, security, and life sciences, we want to allocate limited resources to different sources in order to detect extreme values. In this paper, we study an efficient way to allocate these resources sequentially under limited feedback. While sequential design of experiments is well studied in bandit theory, the most commonly optimized property is the regret with respect to the maximum mean reward. However, in other problems such as network intrusion detection, we are interested in detecting the most extreme value output by the sources. Therefore, in our work we study extreme regret which measures the efficiency of an algorithm compared to the oracle policy selecting the source with the heaviest tail. We propose the ExtremeHunter algorithm, provide its analysis, and evaluate it empirically on synthetic and real-world experiments.},
author = {Carpentier, Alexandra and Valko, Michal},
booktitle = {Neural Information Processing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Carpentier, Valko - 2014 - Extreme bandits.pdf:pdf},
title = {{Extreme bandits}},
year = {2014}
}
@inproceedings{gentile2014online,
author = {Gentile, Claudio and Li, Shuai and Zappella, Giovanni},
booktitle = {International Conference on Machine Learning},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Gentile, Li, Zappella - 2014 - Online Clustering of Bandits.pdf:pdf},
title = {{Online clustering of bandits}},
year = {2014}
}
@article{Bao2016a,
abstract = {Social networks have been popular platforms for information propagation. An important use case is viral marketing: given a promotion budget, an advertiser can choose some influential users as the seed set and provide them free or discounted sample products; in this way, the advertiser hopes to increase the popularity of the product in the users' friend circles by the world-of-mouth effect, and thus maximizes the number of users that information of the production can reach. There has been a body of literature studying the influence maximization problem. Nevertheless, the existing studies mostly investigate the problem on a one-off basis, assuming fixed known influence probabilities among users, or the knowledge of the exact social network topology. In practice, the social network topology and the influence probabilities are typically unknown to the advertiser, which can be varying over time, i.e., in cases of newly established, strengthened or weakened social ties. In this paper, we focus on a dynamic non-stationary social network and design a randomized algorithm, RSB, based on multi-armed bandit optimization, to maximize influence propagation over time. The algorithm produces a sequence of online decisions and calibrates its explore-exploit strategy utilizing outcomes of previous decisions. It is rigorously proven to achieve an upper-bounded regret in reward and applicable to large-scale social networks. Practical effectiveness of the algorithm is evaluated using both synthetic and real-world datasets, which demonstrates that our algorithm outperforms previous stationary methods under non-stationary conditions.},
author = {Bao, Yixin and Wang, Xiaoke and Wang, Zhi and Wu, Chuan and Lau, Francis C. M.},
month = {apr},
pages = {10},
title = {{Online Influence Maximization in Non-Stationary Social Networks}},
url = {http://arxiv.org/abs/1604.07638},
year = {2016}
}
@article{whittle1980multi,
author = {Whittle, Peter},
issn = {00359246},
journal = {Journal of the Royal Statistical Society. Series B (Methodological)},
keywords = {bandits},
mendeley-tags = {bandits},
number = {2},
pages = {143--149},
publisher = {Blackwell Publishing for the Royal Statistical Society},
title = {{Multi-Armed Bandits and the Gittins Index}},
volume = {42},
year = {1980}
}
@inproceedings{hren2008optimistic,
abstract = {If one possesses a model of a controlled deterministic system, then from any state, one may consider the set of all possible reachable states starting from that state and using any sequence of actions. This forms a tree whose size is exponential in the planning time horizon. Here we ask the question: given finite computational resources (e.g. CPU time), which may not be known ahead of time, what is the best way to explore this tree, such that once all resources have been used, the algorithm would be able to propose an action (or a sequence of actions) whose performance is as close as possible to optimality? The performance with respect to optimality is assessed in terms of the regret (with respect to the sum of discounted future rewards) resulting from choosing the action returned by the algorithm instead of an optimal action. In this paper we investigate an optimistic exploration of the tree, where the most promising states are explored first, and compare this approach to a naive uniform exploration. Bounds on the regret are derived both for uniform and optimistic exploration strategies. Numerical simulations illustrate the benefit of optimistic planning.},
author = {Hren, Jean-Francois and Munos, R{\'{e}}mi},
booktitle = {European Workshop on Reinforcement Learning},
title = {{Optimistic Planning of Deterministic Systems}},
year = {2008}
}
@article{cortez2009,
author = {Cortez, P and Cerdeira, A and Almeida, F and Matos, T and Reis, J},
journal = {Decision Support Systems},
pages = {547--553},
publisher = {Elsevier},
title = {{Modeling wine preferences by data mining from physicochemical properties}},
volume = {47},
year = {2009}
}
@phdthesis{stoltz2005incomplete,
address = {Orsay, France},
author = {Stoltz, Gilles},
keywords = {bandits},
mendeley-tags = {bandits},
month = {may},
school = {Universit{\{}{\'{e}}{\}} Paris-Sud},
title = {{Incomplete Information and Internal Regret in Prediction of Individual Sequences}},
type = {PhD thesis},
year = {2005}
}
@inproceedings{hondaasymptotically,
author = {Honda, Junya and Takemura, Akimichi},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {67--79},
title = {{An Asymptotically Optimal Bandit Algorithm for Bounded Support Models}}
}
@inproceedings{bonald2013two-target,
author = {Bonald, Thomas and Prouti{\`{e}}re, Alexandre},
booktitle = {Neural Information Processing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Bonald, Proutiere - 2013 - Two-Target Algorithms for Infinite-Armed Bandits with Bernoulli Rewards.pdf:pdf},
title = {{Two-target algorithms for infinite-armed bandits with Bernoulli rewards}},
year = {2013}
}
@article{audibert2010regret,
author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien},
journal = {Journal of Machine Learning Research},
keywords = {bandits},
mendeley-tags = {bandits},
month = {dec},
pages = {2785--2836},
publisher = {JMLR.org},
title = {{Regret Bounds and Minimax Policies under Partial Monitoring}},
volume = {11},
year = {2010}
}
@inproceedings{garivier2016maximin,
abstract = {We study an original problem of pure exploration in a strategic bandit model motivated by Monte Carlo Tree Search. It consists in identifying the best action in a game, when the player may sample random outcomes of sequentially chosen pairs of actions. We propose two strategies for the fixed-confidence setting: Maximin-LUCB, based on lower-and upper-confidence bounds; and Maximin-Racing, which operates by successively eliminating the sub-optimal actions. We discuss the sample complexity of both methods and compare their performance empirically. We sketch a lower bound analysis, and possible connections to an optimal algorithm.},
archivePrefix = {arXiv},
arxivId = {arXiv:1602.04676v1},
author = {Garivier, Aur{\'{e}}lien and Kaufmann, Emilie and Koolen, Wouter M},
booktitle = {Conference on Learning Theory},
eprint = {arXiv:1602.04676v1},
title = {{Maximin action identification: A new bandit framework for games}},
year = {2016}
}
@inproceedings{munos2011optimistic,
abstract = {We consider a global optimization problem of a deterministic function f in a semi-metric space, given a finite budget of n evaluations. The function f is assumed to be locally smooth (around one of its global maxima) with respect to a semi-metric. We describe two algorithms based on optimistic exploration that use a hierarchical partitioning of the space at all scales. A first contribution is an algorithm, DOO, that requires the knowledge of . We report a finite-sample performance bound in terms of a measure of the quantity of near-optimal states. We then define a second algorithm, SOO, which does not require the knowledge of the semi-metric under which f is smooth, and whose performance is almost as good as DOO optimally-fitted.},
author = {Munos, R{\'{e}}mi},
booktitle = {Neural Information Processing Systems},
title = {{Optimistic optimization of deterministic functions without the knowledge of its smoothness}},
year = {2011}
}
@article{jaksch2010near,
address = {Cambridge, MA, USA},
author = {Jaksch, Thomas and Ortner, Ronald and Auer, Peter},
issn = {1532-4435},
journal = {Journal of Machine Learning Research},
keywords = {bandits},
mendeley-tags = {bandits},
month = {aug},
pages = {1563--1600},
publisher = {MIT Press},
title = {{Near-optimal Regret Bounds for Reinforcement Learning}},
volume = {99},
year = {2010}
}
@article{auer2002using,
author = {Auer, Peter},
journal = {Journal of Machine Learning Research},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {397--422},
title = {{Using confidence bounds for exploitation-exploration trade-offs}},
volume = {3},
year = {2002}
}
@article{Agrawal2012,
abstract = {Thompson Sampling is one of the oldest heuristics for multi-armed bandit problems. It is a randomized algorithm based on Bayesian ideas, and has recently generated significant interest after several studies demonstrated it to have better empirical performance compared to the state-of-the-art methods. However, many questions regarding its theoretical performance remained open. In this paper, we design and analyze a generalization of Thompson Sampling algorithm for the stochastic contextual multi-armed bandit problem with linear payoff functions, when the contexts are provided by an adaptive adversary. This is among the most important and widely studied versions of the contextual bandits problem. We provide the first theoretical guarantees for the contextual version of Thompson Sampling. We prove a high probability regret bound of {\$}\backslashtilde{\{}O{\}}(d{\^{}}{\{}3/2{\}}\backslashsqrt{\{}T{\}}){\$} (or {\$}\backslashtilde{\{}O{\}}(d\backslashsqrt{\{}T \backslashlog(N){\}}){\$}), which is the best regret bound achieved by any computationally efficient algorithm available for this problem in the current literature, and is within a factor of {\$}\backslashsqrt{\{}d{\}}{\$} (or {\$}\backslashsqrt{\{}\backslashlog(N){\}}{\$}) of the information-theoretic lower bound for this problem.},
author = {Agrawal, Shipra and Goyal, Navin},
month = {sep},
title = {{Thompson Sampling for Contextual Bandits with Linear Payoffs}},
url = {http://arxiv.org/abs/1209.3352},
year = {2012}
}
@inproceedings{balcazar2006,
address = {Barcelona, Spain},
booktitle = {Proceedings of the 17th international conference on Algorithmic Learning Theory},
editor = {Balc{\'{a}}zar, Jos{\'{e}} L and Long, Philip M and Stephan, Frank},
isbn = {3-540-46649-5},
keywords = {bandits},
mendeley-tags = {bandits},
month = {oct},
publisher = {Springer},
series = {ALT '06, Lecture Notes in Computer Science},
title = {{No Title}},
volume = {4264},
year = {2006}
}
@inproceedings{azar2014online,
abstract = {In this paper we consider the problem of online stochastic optimization of a locally smooth function under bandit feedback. We introduce the high-confidence tree (HCT) algorithm, a novel any-time {\$}\backslashmathcal{\{}X{\}}{\$}-armed bandit algorithm, and derive regret bounds matching the performance of existing state-of-the-art in terms of dependency on number of steps and smoothness factor. The main advantage of HCT is that it handles the challenging case of correlated rewards, whereas existing methods require that the reward-generating process of each arm is an identically and independent distributed (iid) random process. HCT also improves on the state-of-the-art in terms of its memory requirement as well as requiring a weaker smoothness assumption on the mean-reward function in compare to the previous anytime algorithms. Finally, we discuss how HCT can be applied to the problem of policy search in reinforcement learning and we report preliminary empirical results.},
author = {Azar, Mohammad Gheshlaghi and Lazaric, Alessandro and Brunskill, Emma},
booktitle = {International Conference on Machine Learning},
title = {{Online Stochastic Optimization under Correlated Bandit Feedback}},
year = {2014}
}
@inproceedings{lazarichybrid,
author = {Lazaric, Alessandro and Munos, R{\'{e}}mi},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Hybrid Stochastic-Adversarial Online Learning}}
}
@article{abernethy2008efficient,
abstract = {We introduce an efficient algorithm for the problem of online linear optimization in the bandit setting which achieves the optimal O (T regret. The setting is a natural generalization of the non-stochastic multi-armed bandit problem, and the existence of an efficient optimal algorithm has been posed as an open problem in a number of recent papers. We show how the difficulties encountered by previous approaches are overcome by the use of a self-concordant potential function. Our approach presents a novel connection between online learning and interior point methods.},
author = {Abernethy, Jacob Duncan and Hazan, Elad and Rakhlin, Alexander},
doi = {10.1080/09544820500115717},
institution = {EECS Department, University of California, Berkeley},
issn = {09544828},
journal = {Online},
number = {3},
pages = {540--543},
publisher = {Citeseer},
title = {{An Efficient Algorithm for Bandit Linear Optimization}},
url = {http://www.informaworld.com/openurl?genre=article{\&}doi=10.1080/09544820500115717{\&}magic=crossref},
volume = {1},
year = {2008}
}
@article{beygelzimer2010contextual,
abstract = {We address the problem of learning in an online, bandit setting where the learner must repeatedly select among K actions, but only receives partial feedback based on its choices. We establish two new facts: First, using a new algorithm called Exp4.P, we show that it is possible to compete with the best in a set of N experts with probability {\$}1-delta while incurring regret at most O(sqrtKTln(N/delta)) over T time steps. The new algorithm is tested empirically in a large-scale, real-world dataset. Second, we give a new algorithm called VE that competes with a possibly infinite set of policies of VC-dimension d while incurring regret at most O(sqrtT(dln(T) + ln (1/delta))) with probability {\$}1-delta. These guarantees improve on those of all previous algorithms, whether in a stochastic or adversarial environment, and bring us closer to providing supervised learning type guarantees for the contextual bandit setting.},
author = {Beygelzimer, Alina and Langford, John and Li, Lihong and Reyzin, Lev and Schapire, Robert E},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Beygelzimer et al. - 2010 - Contextual Bandit Algorithms with Supervised Learning Guarantees.pdf:pdf},
journal = {Machine Learning},
pages = {14},
title = {{Contextual Bandit Algorithms with Supervised Learning Guarantees}},
url = {http://arxiv.org/abs/1002.4058},
volume = {15},
year = {2010}
}
@inproceedings{bengio2009,
address = {Vancouver, British Columbia, Canada},
booktitle = {Proceedings of the 23rd conference on advances in Neural Information Processing Systems},
editor = {Bengio, Yoshua and Schuurmans, Dale and Lafferty, John D and Williams, Chris K I and Culotta, Aron},
month = {dec},
series = {NIPS '09},
title = {{No Title}},
year = {2009}
}
@article{bubeck2011x,
abstract = {We consider a generalization of stochastic bandits where the set of arms, cX, is allowed to be a generic measurable space and the mean-payoff function is "locally Lipschitz" with respect to a dissimilarity function that is known to the decision maker. Under this condition we construct an arm selection policy, called HOO (hierarchical optimistic optimization), with improved regret bounds compared to previous results for a large class of problems. In particular, our results imply that if cX is the unit hypercube in a Euclidean space and the mean-payoff function has a finite number of global maxima around which the behavior of the function is locally continuous with a known smoothness degree, then the expected regret of HOO is bounded up to a logarithmic factor by sqrtn, i.e., the rate of growth of the regret is independent of the dimension of the space. We also prove the minimax optimality of our algorithm when the dissimilarity is a metric. Our basic strategy has quadratic computational complexity as a function of the number of time steps and does not rely on the doubling trick. We also introduce a modified strategy, which relies on the doubling trick but runs in linearithmic time. Both results are improvements with respect to previous approaches.},
author = {Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi and Stoltz, Gilles and Szepesv{\'{a}}ri, Csaba},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Bubeck et al. - 2011 - X-armed bandits.pdf:pdf},
journal = {Journal of Machine Learning Research},
pages = {1587--1627},
title = {{X-armed bandits}},
volume = {12},
year = {2011}
}
@inproceedings{hutter2007,
address = {Sendai, Japan},
booktitle = {Proceedings of the 18th international conference on Algorithmic Learning Theory},
editor = {Hutter, Marcus and Servedio, Rocco A and Takimoto, Eiji},
isbn = {978-3-540-75224-0},
keywords = {bandits},
mendeley-tags = {bandits},
month = {oct},
publisher = {Springer},
series = {ALT '07, Lecture Notes in Computer Science},
title = {{No Title}},
volume = {4754},
year = {2007}
}
@inproceedings{audibert2009minimax,
author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien},
booktitle = {Conference on Learning Theory},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Minimax policies for adversarial and stochastic bandits}},
year = {2009}
}
@inproceedings{bubeck2008online,
author = {Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi and Stoltz, Gilles and Szepesv{\'{a}}ri, Csaba},
booktitle = {Advances in Neural Information Processing Systems},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {201--208},
title = {{Online Optimization of X-armed Bandits}},
year = {2008}
}
@inproceedings{dani2008stochastic,
author = {Dani, Varsha and Hayes, Thomas P and Kakade, Sham M},
booktitle = {Conference on Learning Theory},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Stochastic linear optimization under bandit feedback}},
year = {2008}
}
@phdthesis{maillard2011apprentissage,
author = {Maillard, Odalric-Ambrym},
keywords = {bandits},
mendeley-tags = {bandits},
school = {Universit{\{}{\'{e}}{\}} des Sciences et des Technologies de Lille 1},
title = {{Apprentissage s{\'{e}}quentiel: Bandits, Statistique et Renforcement}},
year = {2011}
}
@misc{pandora2013,
author = {Pandora},
howpublished = {http://www.pandora.com},
title = {{Internet Radio}},
url = {http://www.pandora.com},
year = {2013}
}
@book{gittins1989multi,
author = {Gittins, John C and Weber, Richard and Glazebrook, Kevin},
keywords = {bandits},
mendeley-tags = {bandits},
publisher = {Wiley},
title = {{Multi-armed Bandit Allocation Indices}},
year = {1989}
}
@inproceedings{yue2009k,
author = {Yue, Yisong and Broder, J and Kleinberg, R and Joachims, T},
booktitle = {22th annual conference on learning theory},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{The K-armed Dueling Bandits Problem}},
year = {2009}
}
@inproceedings{auer2007improved,
author = {Auer, Peter and Ortner, Ronald and Szepesv{\'{a}}ri, Csaba},
booktitle = {Proceedings of the 20th Conference on Learning Theory},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Improved rates for the stochastic continuum-armed bandit problem}},
year = {2007}
}
@incollection{langford2008epoch,
address = {Cambridge, MA},
author = {Langford, John and Zhang, Tong},
booktitle = {Advances in Neural Information Processing Systems 20},
editor = {Platt, J C and Koller, D and Singer, Y and Roweis, S},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {817--824},
publisher = {MIT Press},
title = {{The Epoch-Greedy Algorithm for Multi-armed Bandits with Side Information}},
year = {2008}
}
@article{bull2015adaptive,
author = {Bull, Adam D.},
journal = {Bernoulli},
keywords = {bandits on taxonomies,continuum-armed bandits,noisy global optimisation,tree-armed bandits,zooming dimension},
number = {4},
pages = {2289--2307},
title = {{Adaptive-treed bandits}},
volume = {21},
year = {2015}
}
@article{Contal2016a,
abstract = {The paper considers the problem of global optimization in the setup of stochastic process bandits. We introduce an UCB algorithm which builds a cascade of discretization trees based on generic chaining in order to render possible his operability over a continuous domain. The theoretical framework applies to functions under weak probabilistic smoothness assumptions and also extends significantly the spectrum of application of UCB strategies. Moreover generic regret bounds are derived which are then specialized to Gaussian processes indexed on infinite-dimensional spaces as well as to quadratic forms of Gaussian processes. Lower bounds are also proved in the case of Gaussian processes to assess the optimality of the proposed algorithm.},
author = {Contal, Emile and Vayatis, Nicolas},
month = {feb},
title = {{Stochastic Process Bandits: Upper Confidence Bounds Algorithms via Generic Chaining}},
url = {http://arxiv.org/abs/1602.04976},
year = {2016}
}
@inproceedings{kleinbergnearly,
author = {Kleinberg, Robert D},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Nearly Tight Bounds for the Continuum-Armed Bandit Problem}}
}
@inproceedings{wang2008algorithms,
author = {Wang, Yizao and Audibert, Jean-Yves and Munos, R{\'{e}}mi},
booktitle = {Neural Information Processing Systems},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Algorithms for infinitely many-armed bandits}},
year = {2008}
}
@article{bubeck2012regret,
abstract = {Multi-armed bandit problems are the most basic examples of sequential decision problems with an exploration-exploitation trade-off. This is the balance between staying with the option that gave highest payoffs in the past and exploring new options that might give higher payoffs in the future. Although the study of bandit problems dates back to the Thirties, exploration-exploitation trade-offs arise in several modern applications, such as ad placement, website optimization, and packet routing. Mathematically, a multi-armed bandit is defined by the payoff process associated with each option. In this survey, we focus on two extreme cases in which the analysis of regret is particularly simple and elegant: i.i.d. payoffs and adversarial payoffs. Besides the basic setting of finitely many actions, we also analyze some of the most important variants and extensions, such as the contextual bandit model.},
archivePrefix = {arXiv},
arxivId = {1204.5721},
author = {Bubeck, S{\'{e}}bastien and Cesa-Bianchi, Nicol{\`{o}}},
eprint = {1204.5721},
journal = {Foundations and Trends in Machine Learning},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {1--122},
title = {{Regret Analysis of Stochastic and Nonstochastic Multi-armed Bandit Problems}},
url = {http://arxiv.org/abs/1204.5721},
volume = {5},
year = {2012}
}
@inproceedings{chu2011contextual,
author = {Chu, Lei and Li, Lihong and Reyzin, Lev and Schapire, Robert E},
booktitle = {International Conference on Artificial Intelligence and Statistics},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Chu et al. - 2011 - Contextual Bandits with Linear Payoff Functions.pdf:pdf},
title = {{Contextual bandits with linear payoff functions}},
year = {2011}
}
@inproceedings{kalai2010,
booktitle = {Proceedings of the 23rd annual Conference On Learning Theory},
editor = {Kalai, Adam Tauman and Mohri, Mehryar},
isbn = {978-0-9822529-2-5},
keywords = {bandits},
mendeley-tags = {bandits},
month = {jun},
publisher = {Omnipress},
title = {{No Title}},
year = {2010}
}
@inproceedings{kveton2015tight,
author = {Kveton, Branislav and Wen, Zheng and Ashkan, Azin and Szepesvari, Csaba},
booktitle = {International Conference on Artificial Intelligence and Statistics},
title = {{Tight regret bounds for stochastic combinatorial semi-bandits}},
year = {2015}
}
@article{Tu2015a,
author = {Tu, Shi-tao and Zhu, Lan-juan},
issn = {1007-1172},
journal = {Journal of Shanghai Jiaotong University (Science)},
month = {may},
number = {5},
pages = {535--539},
title = {{A bandit method using probabilistic matrix factorization in recommendation}},
url = {http://link.springer.com/10.1007/s12204-015-1618-7},
volume = {20},
year = {2015}
}
@article{hazan2007logarithmic,
author = {Hazan, Elad and Agarwal, Amit and Kale, Satyen},
journal = {Machine Learning},
keywords = {bandits},
mendeley-tags = {bandits},
number = {2-3},
pages = {169--192},
title = {{Logarithmic Regret Algorithms for Online Convex Optimization}},
volume = {69},
year = {2007}
}
@article{bartok2011minimax,
abstract = {In a partial monitoring game, the learner repeatedly chooses an action, the environment responds with an outcome, and then the learner suffers a loss and receives a feedback signal, both of which are fixed functions of the action and the outcome. The goal of the learner is to minimize his regret, which is the difference between his total cumulative loss and the total loss of the best fixed action in hindsight. Assuming that the outcomes are generated in an i.i.d. fashion from an arbitrary and unknown probability distribution, we characterize the minimax regret of any partial monitoring game with finitely many actions and outcomes. It turns out that the minimax regret of any such game is either zero, {\&}z.Theta;̃(√T), {\&}z.Theta;(T2/3), or {\&}z.Theta;(T). We provide a computationally efficient learning algorithm that achieves the minimax regret within logarithmic factor for any game. {\textcopyright} 2011 G. Bart{\'{o}}k, D. P{\'{a}}l {\&} C. Szepesv{\'{a}}ri.},
author = {Bart{\'{o}}k, G{\'{a}}bor and P{\'{a}}l, D{\'{a}}vid and Szepesv{\'{a}}ri, Csaba},
journal = {Conference on Learning Theory},
keywords = {Imperfect feedback,Online learning,Regret analysis},
title = {{Minimax regret of finite partial-monitoring games in stochastic environments}},
year = {2011}
}
@article{rusmevichientong2010linearly,
address = {Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA},
author = {Rusmevichientong, Paat and Tsitsiklis, John N},
journal = {Math. Oper. Res.},
keywords = {bandits},
mendeley-tags = {bandits},
month = {may},
number = {2},
pages = {395--411},
publisher = {Informs},
title = {{Linearly Parameterized Bandits}},
volume = {35},
year = {2010}
}
@inproceedings{desautels12parallelizing,
author = {Desautels, Thomas and Krause, Andreas and Burdick, Joel},
booktitle = {International Conference on Machine Learning},
title = {{Parallelizing exploration-exploitation tradeoffs in Gaussian process bandit optimization}},
year = {2012}
}
@article{burnetas1996optimal,
author = {Burnetas, Apostolos N. and Katehakis, Micha{\"{e}}l N.},
journal = {Advances in Applied Mathematics},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {122--142},
title = {{Optimal adaptive policies for sequential allocation problems}},
volume = {17(2)},
year = {1996}
}
@inproceedings{zinkevich2003online,
author = {Zinkevich, Martin},
booktitle = {Proceedings of the 20th International Conference on Machine Learning},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {928--936},
title = {{Online Convex Programming and Generalized Infinitesimal Gradient Ascent}},
year = {2003}
}
@article{dudik2011efficient,
abstract = {We address the problem of learning in an online setting where the learner repeatedly observes features, selects among a set of actions, and receives reward for the action taken. We provide the first efficient algorithm with an optimal regret. Our algorithm uses a cost sensitive classification learner as an oracle and has a running time mathrmpolylog(N), where N is the number of classification rules among which the oracle might choose. This is exponentially faster than all previous algorithms that achieve optimal regret in this setting. Our formulation also enables us to create an algorithm with regret that is additive rather than multiplicative in feedback delay as in all previous work.},
author = {Dudik, Miroslav and Hsu, Daniel and Kale, Satyen and Karampatziakis, Nikos and Langford, John and Reyzin, Lev and Zhang, Tong},
journal = {Proceedings of the 27th Conference on Uncertainty in Artificial Intelligence},
title = {{Efficient Optimal Learning for Contextual Bandits}},
url = {http://arxiv.org/abs/1106.2369},
year = {2011}
}
@inproceedings{neu2013efficient,
author = {Neu, Gergely and Bart{\'{o}}k, G{\'{a}}bor},
booktitle = {Algorithmic Learning Theory},
title = {{An efficient algorithm for learning with semi-bandit feedback}},
year = {2013}
}
@inproceedings{danyluk2009,
address = {Montreal, Quebec, Canada},
booktitle = {Proceedings of the 26th International Conference on Machine Learning},
editor = {Danyluk, Andrea Pohoreckyj and Bottou, L{\'{e}}on and Littman, Michael L},
isbn = {978-1-60558-516-1},
month = {jun},
publisher = {ACM},
series = {ICML '09, ACM International Conference Proceeding Series},
title = {{No Title}},
volume = {382},
year = {2009}
}
@book{cesa-bianchi2006prediction,
annote = {From Duplicate 1 ( Prediction, Learning, and Games - Cesa-Bianchi, Nicolo; Lugosi, Gabor )
},
author = {Cesa-Bianchi, Nicol{\`{o}} and Lugosi, G{\'{a}}bor},
keywords = {bandits},
mendeley-tags = {bandits},
publisher = {Cambridge University Press},
title = {{Prediction, learning, and games}},
year = {2006}
}
@inproceedings{chakrabartimortal,
author = {Chakrabarti, Deepayan and Kumar, Ravi and Radlinski, Filip and Upfal, Eli},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {273--280},
title = {{Mortal Multi-Armed Bandits.}}
}
@inproceedings{bubeck2012towards,
author = {Bubeck, S{\'{e}}bastien and Cesa-Bianchi, Nicol{\`{o}} and Kakade, Sham M.},
booktitle = {Conference on Learning Theory},
title = {{Towards minimax policies for online linear optimization with bandit feedback}},
year = {2012}
}
@inproceedings{pandey2007multi,
address = {New York, NY, USA},
author = {Pandey, S and Chakrabarti, D and Agarwal, D},
booktitle = {ICML '07: Proceedings of the 24th international conference on Machine learning},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {721--728},
publisher = {ACM},
title = {{Multi-Armed Bandit Problems with Dependent Arms}},
year = {2007}
}
@inproceedings{koller2008,
address = {Vancouver, British Columbia, Canada},
booktitle = {Proceedings of the 22nd conference on advances in Neural Information Processing Systems},
editor = {Koller, Daphne and Schuurmans, Dale and Bengio, Yoshua and Bottou, L{\'{e}}on},
month = {dec},
publisher = {MIT Press},
series = {NIPS '08},
title = {{No Title}},
year = {2008}
}
@phdthesis{stoltz2011contributions,
author = {Stoltz, Gilles},
keywords = {bandits},
mendeley-tags = {bandits},
school = {Universit{\{}{\'{e}}{\}} Paris-Sud},
title = {{Contributions to the sequential prediction of arbitrary sequences: applications to the theory of repeated games and empirical studies of the performance of the aggregation of experts}},
type = {Habilitation {\{}{\`{a}}{\}} Diriger des Recherches},
year = {2011}
}
@article{mannor2004sample,
author = {Mannor, Shie and Tsitsiklis, John N},
journal = {Journal of Machine Learning Research},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {623--648},
title = {{The Sample Complexity of Exploration in the Multi-Armed Bandit Problem}},
volume = {5},
year = {2004}
}
@inproceedings{freund2008,
address = {Budapest, Hungary},
booktitle = {Proceedings of the 19th international conference on Algorithmic Learning Theory},
editor = {Freund, Yoav and Gy{\"{o}}rfi, L{\'{a}}szl{\'{o}} and Tur{\'{a}}n, Gy{\"{o}}rgy and Zeugmann, Thomas},
isbn = {978-3-540-87986-2},
keywords = {bandits},
mendeley-tags = {bandits},
month = {oct},
publisher = {Springer},
series = {ALT '08, Lecture Notes in Computer Science},
title = {{No Title}},
volume = {5254},
year = {2008}
}
@inproceedings{mannor2011from,
author = {Mannor, Shie and Shamir, Ohad},
booktitle = {Neural Information Processing Systems},
title = {{From bandits to experts: On the value of side-observations}},
year = {2011}
}
@inproceedings{brodley2004,
address = {Banff, Alberta, Canada},
booktitle = {Proceedings of the 21st International Conference on Machine Learning},
editor = {Brodley, Carla E},
keywords = {bandits},
mendeley-tags = {bandits},
month = {jul},
publisher = {ACM},
series = {ICML '04, ACM International Conference Proceeding Series},
title = {{No Title}},
volume = {69},
year = {2004}
}
@inproceedings{vaswani2015influence,
abstract = {Most work on influence maximization assumes network influence probabilities are given. The few papers that propose algorithms for learning these probabilities assume the availability of a batch of diffusion cascades and learn the probabilities offline. We tackle the real but difficult problems of (i)learning in influence probabilities and (ii) maximizing influence spread, when no cascades are available as input, by adopting a combinatorial multi-armed bandit (CMAB) paradigm. We formulate the above problems respectively as network exploration, i.e., minimizing the error in learned influence probabilities, and minimization of loss in spread from choosing suboptimal seed sets over the rounds of a CMAB game. We propose algorithms for both problems and establish bounds on their performance. Finally, we demonstrate the effectiveness and usefulness of the proposed algorithms via a comprehensive set of experiments over three real datasets.},
author = {Vaswani, Sharan and Lakshmanan, Laks. V. S. and {Mark Schmidt}},
booktitle = {NIPS workshop on Networks in the Social and Information Sciences 2015},
title = {{Influence maximization with bandits}},
year = {2015}
}
@article{bubeck2013bandits,
author = {Bubeck, S{\'{e}}bastien and Cesa-Bianchi, Nicol{\`{o}} and Lugosi, G{\'{a}}bor},
doi = {10.1109/TIT.2013.2277869},
issn = {0018-9448},
journal = {Information Theory, IEEE Transactions on},
keywords = {Electronic mail,Equations,Heavy-tailed distributions,Indexes,Probability distribution,Random variables,Robustness,Standards,regret bounds,robust estimators,stochastic multi-armed bandit},
number = {11},
pages = {7711--7717},
title = {{Bandits With Heavy Tail}},
volume = {59},
year = {2013}
}
@inproceedings{auer1995gambling,
author = {Auer, Peter and Cesa-Bianchi, Nicol{\`{o}} and Freund, Yoav and Schapire, Robert E},
booktitle = {Proceedings of the 36th Annual Symposium on Foundations of Computer Science},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {322--331},
title = {{Gambling in a Rigged Casino: The Adversarial Multi-Armed Bandit problem}},
year = {1995}
}
@misc{Guillou2015a,
abstract = {Recommender Systems (RS) aim at suggesting to users one or several items in which they might have interest. Following the feedback they receive from the user, these systems have to adapt their model in order to improve future recommendations. The repetition of these steps defines the RS as a sequential process. This sequential aspect raises an exploration-exploitation dilemma, which is surprisingly rarely taken into account for RS without contextual information. In this paper we present an explore-exploit collaborative filtering RS, based on Matrix Factor-ization and Bandits algorithms. Using experiments on artificial and real datasets, we show the importance and practicability of using sequential approaches to perform recommendation. We also study the impact of the model update on both the quality and the computation time of the recommendation procedure.},
author = {Guillou, Fr{\'{e}}d{\'{e}}ric and Gaudel, Romaric and Preux, Philippe},
booktitle = {NIPS'15 Workshop: Machine Learning for eCommerce},
keywords = {Collaborative Filtering,Matrix Factorization,Multi-Armed Bandtis,Recommender Systems,sequential Recommender Systems},
language = {en},
month = {dec},
title = {{Collaborative Filtering as a Multi-Armed Bandit}},
url = {https://hal.inria.fr/hal-01256254},
year = {2015}
}
@inproceedings{cohen2008,
address = {Helsinki, Finland},
booktitle = {Proceedings of the 25th International Conference on Machine Learning},
editor = {Cohen, William W and McCallum, Andrew and Roweis, Sam T},
isbn = {978-1-60558-205-4},
keywords = {bandits},
mendeley-tags = {bandits},
month = {jun},
publisher = {ACM},
series = {ICML '08, ACM International Conference Proceeding Series},
title = {{No Title}},
volume = {307},
year = {2008}
}
@misc{snapnets,
author = {Leskovec, Jure and Krevl, Andrej},
howpublished = {$\backslash$url{\{}http://snap.stanford.edu/data{\}}},
month = {jun},
title = {{{\{}SNAP Datasets{\}}: {\{}Stanford{\}} Large Network Dataset Collection}},
year = {2014}
}
@inproceedings{ben-david2009agnostic,
author = {Ben-David, Shai and P{\'{a}}l, D{\'{a}}vid and Shalev-Shwartz, Shai},
booktitle = {22th annual conference on learning theory},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Agnostic Online Learning}},
year = {2009}
}
@inproceedings{wu2015online,
author = {Wu, Yifan and Gy{\"{o}}rgy, Andr{\'{a}}s and Szepesv{\'{a}}ri, Csaba},
booktitle = {Neural Information Processing Systems},
title = {{Online learning with Gaussian payoffs and side observations}},
year = {2015}
}
@article{poland2008nonstochastic,
author = {Poland, Jan},
journal = {Theoretical Computuer Science},
keywords = {bandits},
mendeley-tags = {bandits},
month = {jul},
number = {1-3},
pages = {77--93},
title = {{Nonstochastic bandits: Countable decision set, unbounded costs and reactive environments.}},
volume = {397},
year = {2008}
}
@article{Lei2015a,
abstract = {Social networks are commonly used for marketing purposes. For example, free samples of a product can be given to a few influential social network users (or "seed nodes"), with the hope that they will convince their friends to buy it. One way to formalize marketers' objective is through influence maximization (or IM), whose goal is to find the best seed nodes to activate under a fixed budget, so that the number of people who get influenced in the end is maximized. Recent solutions to IM rely on the influence probability that a user influences another one. However, this probability information may be unavailable or incomplete. In this paper, we study IM in the absence of complete information on influence probability. We call this problem Online Influence Maximization (OIM) since we learn influence probabilities at the same time we run influence campaigns. To solve OIM, we propose a multiple-trial approach, where (1) some seed nodes are selected based on existing influence information; (2) an influence campaign is started with these seed nodes; and (3) users' feedback is used to update influence information. We adopt the Explore-Exploit strategy, which can select seed nodes using either the current influence probability estimation (exploit), or the confidence bound on the estimation (explore). Any existing IM algorithm can be used in this framework. We also develop an incremental algorithm that can significantly reduce the overhead of handling users' feedback information. Our experiments show that our solution is more effective than traditional IM methods on the partial information.},
author = {Lei, Siyu and Maniu, Silviu and Mo, Luyi and Cheng, Reynold and Senellart, Pierre},
month = {jun},
pages = {13},
title = {{Online Influence Maximization (Extended Version)}},
url = {http://arxiv.org/abs/1506.01188},
year = {2015}
}
@inproceedings{littlestone1989weighted,
address = {Washington, DC, USA},
author = {Littlestone, Nick and Warmuth, Manfred K},
booktitle = {Proceedings of the 30th annual Symposium on Foundations of Computer Science},
isbn = {0-8186-1982-1},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {256--261},
publisher = {IEEE Computer Society},
title = {{The weighted majority algorithm}},
year = {1989}
}
@inproceedings{auer2005,
address = {Bertinoro, Italy},
booktitle = {Proceedings of the 18th annual Conference On Learning Theory},
editor = {Auer, Peter and Meir, Ron},
isbn = {3-540-26556-2},
keywords = {bandits},
mendeley-tags = {bandits},
month = {jun},
publisher = {Springer},
series = {COLT '05, Lecture Notes in Computer Science},
title = {{No Title}},
volume = {3559},
year = {2005}
}
@article{silver2016mastering,
abstract = {The game of Go has long been viewed as the most challenging of classic games for artificial intelligence owing to its enormous search space and the difficulty of evaluating board positions and moves. Here we introduce a new approach to computer Go that uses ‘value networks' to evaluate board positions and ‘policy networks' to select moves. These deep neural networks are trained by a novel combination of supervised learning from human expert games, and reinforcement learning from games of self-play. Without any lookahead search, the neural networks play Go at the level of state-of-the-art Monte Carlo tree search programs that simulate thousands of random games of self-play. We also introduce a new search algorithm that combines Monte Carlo simulation with value and policy networks. Using this search algorithm, our program AlphaGo achieved a 99.8{\%} winning rate against other Go programs, and defeated the human European Go champion by 5 games to 0. This is the first time that a computer program has defeated a human professional player in the full-sized game of Go, a feat previously thought to be at least a decade away.},
author = {Silver, David and Huang, Aja and Maddison, Chris J. and Guez, Arthur and Sifre, Laurent and van den Driessche, George and Schrittwieser, Julian and Antonoglou, Ioannis and Panneershelvam, Veda and Lanctot, Marc and Dieleman, Sander and Grewe, Dominik and Nham, John and Kalchbrenner, Nal and Sutskever, Ilya and Lillicrap, Timothy and Leach, Madeleine and Kavukcuoglu, Koray and Graepel, Thore and Hassabis, Demis},
journal = {Nature},
number = {7587},
pages = {484--489},
shorttitle = {Nature},
title = {{Mastering the game of Go with deep neural networks and tree search}},
volume = {529},
year = {2016}
}
@inproceedings{streeter2006asymptotically,
abstract = {We present an asymptotically optimal algorithm for the max variant of the k-armed bandit problem. Given a set of k slot machines, each yielding payoff from a fixed (but unknown) distribution, we wish to allocate trials to the machines so as to maximize the expected maximum payoff � received over a series of n trials. Subject to certain distributional assumptions, we show that O ln ( 1 ln(n)2) $\delta$ ɛ2 � trials are sufficient to identify, with probability at least 1 − $\delta$, a machine whose expected maximum payoff is within ɛ of optimal. This result leads to a strategy for solving the problem that is asymptotically optimal in the following sense: the gap between the expected maximum payoff obtained by using our strategy for n trials and that obtained by pulling the single best arm for all n trials approaches zero as n → ∞.},
author = {Streeter, Matthew J. and Smith, Stephen F.},
booktitle = {AAAI Conference on Artificial Intelligence Intelligence},
doi = {10.1.1.91.4735},
keywords = {constraint satisfaction,satisfiability},
pages = {135--142},
title = {{An Asymptotically Optimal Algorithm for the Max k-Armed Bandit Problem}},
year = {2006}
}
@article{cesa-bianchi2005minimizing,
abstract = {We investigate label efficient prediction, a variant, proposed by Helmbold and Panizza, of the problem of prediction with expert advice. In this variant, the forecaster, after guessing the next element of the sequence to be predicted, does not observe its true value unless he asks for it, which he cannot do too often. We determine matching upper and lower bounds for the best possible excess prediction error, with respect to the best possible constant predictor, when the number of allowed queries is fixed. We also prove that Hannan consistency, a fundamental property in game-theoretic prediction models, can be achieved by a forecaster issuing a number of queries growing to infinity at a rate just slightly faster than logarithmic in the number of prediction rounds.},
author = {Cesa-Bianchi, Nicol{\`{o}} and Lugosi, G{\'{a}}bor and Stoltz, Gilles},
journal = {IEEE Transactions on Information Theory},
keywords = {Individual sequences,Label efficient prediction,On-line learning,Prediction with expert advice},
number = {6},
pages = {2152--2162},
title = {{Minimizing regret with label efficient prediction}},
volume = {51},
year = {2005}
}
@article{auer2010ucb,
abstract = {In the stochastic multi-armed bandit problem we consider a modification of the UCB algorithm of Auer et al. 4. For this modified algorithm we give an improved bound on the regret with respect to the optimal reward. While for the original UCB algorithm the regret in K-armed bandits after T trials is bounded by const K log(T)/Delta, where Delta measures the distance between a suboptimal arm and the optimal arm, for the modified UCB algorithm we show an upper bound on the regret of const K log (T/Delta 2) Delta.},
author = {Auer, Peter and Ortner, Ronald},
journal = {Periodica Mathematica Hungarica},
keywords = {computational,information theoretic learning with statistics,learning,statistics {\&} optimisation,theory {\&} algorithms},
title = {{UCB revisited: Improved regret bounds for the stochastic multi-armed bandit problem}},
year = {2010}
}
@incollection{seldin2011pac,
author = {Seldin, Yevgeny and Auer, Peter and Laviolette, Francois and Shawe-Taylor, John S and Ortner, Ronald},
booktitle = {Neural Information Processing Systems (NIPS)},
pages = {1683--1691},
title = {{PAC-Bayesian Analysis of Contextual Bandits}},
year = {2011}
}
@inproceedings{pandey2007bandits,
author = {Pandey, S and Agarwal, D and Chakrabarti, D and Josifovski, V},
booktitle = {Proceedings of the Seventh SIAM International Conference on Data Mining},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Bandits for Taxonomies: A Model-based Approach}},
year = {2007}
}
@article{Gopalan2013b,
abstract = {We consider stochastic multi-armed bandit problems with complex actions over a set of basic arms, where the decision maker plays a complex action rather than a basic arm in each round. The reward of the complex action is some function of the basic arms' rewards, and the feedback observed may not necessarily be the reward per-arm. For instance, when the complex actions are subsets of the arms, we may only observe the maximum reward over the chosen subset. Thus, feedback across complex actions may be coupled due to the nature of the reward function. We prove a frequentist regret bound for Thompson sampling in a very general setting involving parameter, action and observation spaces and a likelihood function over them. The bound holds for discretely-supported priors over the parameter space and without additional structural properties such as closed-form posteriors, conjugate prior structure or independence across arms. The regret bound scales logarithmically with time but, more importantly, with an improved constant that non-trivially captures the coupling across complex actions due to the structure of the rewards. As applications, we derive improved regret bounds for classes of complex bandit problems involving selecting subsets of arms, including the first nontrivial regret bounds for nonlinear MAX reward feedback from subsets.},
author = {Gopalan, Aditya and Mannor, Shie and Mansour, Yishay},
month = {nov},
title = {{Thompson Sampling for Complex Bandit Problems}},
url = {http://arxiv.org/abs/1311.0466},
year = {2013}
}
@article{awerbuch2008online,
address = {Orlando, FL, USA},
author = {Awerbuch, Baruch and Kleinberg, Robert D},
issn = {0022-0000},
journal = {Journal of Computer Systems and Science},
keywords = {bandits},
mendeley-tags = {bandits},
month = {feb},
number = {1},
pages = {97--114},
publisher = {Academic Press, Inc.},
title = {{Online linear optimization and adaptive routing}},
volume = {74},
year = {2008}
}
@article{guha2007approximation,
author = {Guha, Sudipto and Munagala, Kamesh and Shi, Peng},
journal = {CoRR},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Approximation Algorithms for Restless Bandit Problems}},
volume = {abs/0711.3},
year = {2007}
}
@phdthesis{bubeck2010bandits,
author = {Bubeck, S{\'{e}}bastien},
keywords = {bandits},
mendeley-tags = {bandits},
school = {Universit{\{}{\'{e}}{\}} de Lille 1},
title = {{Bandits Games and Clustering Foundations}},
year = {2010}
}
@article{bartok2014partial,
abstract = {In a partial monitoring game, the learner repeatedly chooses an action, the environment responds with an outcome, and then the learner suffers a loss and receives a feedback signal, both of which are fixed functions of the action and the outcome. The goal of the learner is to minimize his regret, which is the difference between his total cumulative loss and the total loss of the best fixed action in hindsight. In this paper we characterize the minimax regret of any partial monitoring game with finitely many actions and outcomes. It turns out that the minimax regret of any such game is either zero, $\Theta$(√ T), $\Theta$(T 2/3), or $\Theta$(T). We provide computationally efficient learning algorithms that achieve the minimax regret within logarithmic factor for any game. In addition to the bounds on the minimax regret, if we assume that the outcomes are generated in an i.i.d. fashion, we prove individual upper bounds on the expected regret.},
author = {Bart{\'{o}}k, G{\'{a}}bor and Foster, Dean P. and P{\'{a}}l, D{\'{a}}vid and Rakhlin, Alexander and Szepesv{\'{a}}ri, Csaba},
journal = {Mathematics of Operations Research},
number = {4},
pages = {967--997},
title = {{Partial monitoring-classification, regret bounds, and algorithms}},
volume = {39},
year = {2014}
}
@inproceedings{yue2011linear,
author = {Yue, Yisong and Guestrin, Carlos},
booktitle = {Neural Information Processing Systems},
title = {{Linear submodular bandits and their application to diversified retrieval}},
year = {2011}
}
@article{Asadi2016a,
abstract = {A softmax operator applied to a set of values acts somewhat like the maximization function and somewhat like an average. In sequential decision making, softmax is often used in settings where it is necessary to maximize utility but also to hedge against problems that arise from putting all of one's weight behind a single maximum utility decision. The Boltzmann softmax operator is the most commonly used softmax operator in this setting, but we show that this operator is prone to misbehavior. In this work, we study an alternative softmax operator that, among other properties, is both a non-expansion (ensuring convergent behavior in learning and planning) and differentiable (making it possible to improve decisions via gradient descent methods). We provide proofs of these properties and present empirical comparisons between various softmax operators.},
author = {Asadi, Kavosh and Littman, Michael L.},
month = {dec},
title = {{A New Softmax Operator for Reinforcement Learning}},
url = {http://arxiv.org/abs/1612.05628},
year = {2016}
}
@inproceedings{coquelin2007bandit,
author = {Coquelin, Pierre-Arnaud and Munos, R{\'{e}}mi},
booktitle = {Uncertainty in Artificial Intelligence},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Bandit algorithms for tree search}},
year = {2007}
}
@inproceedings{neu2014online,
abstract = {Most work on sequential learning assumes a fixed set of actions that are available all the time. However, in practice, actions can consist of picking subsets of readings from sensors that may break from time to time, road segments that can be blocked or goods that are out of stock. In this paper we study learning algorithms that are able to deal with stochastic availability of such unreliable composite actions. We propose and analyze algorithms based on the Follow-The-Perturbed-Leader prediction method for several learning settings differing in the feedback provided to the learner. Our algorithms rely on a novel loss estimation technique that we call Counting Asleep Times. We deliver regret bounds for our algorithms for the previously studied full information and (semi-)bandit settings, as well as a natural middle point between the two that we call the restricted information setting. A special consequence of our results is a significant improvement of the best known performance guarantees achieved by an efficient algorithm for the sleeping bandit problem with stochastic availability. Finally, we evaluate our algorithms empirically and show their improvement over the known approaches.},
author = {Neu, Gergely and Valko, Michal},
booktitle = {Neural Information Processing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Neu, Valko - 2014 - Online combinatorial optimization with stochastic decision sets and adversarial losses.pdf:pdf},
title = {{Online combinatorial optimization with stochastic decision sets and adversarial losses}},
year = {2014}
}
@inproceedings{slivkins2011multi-armed,
author = {Slivkins, Aleksandrs},
booktitle = {Neural Information Processing Systems},
title = {{Multi-armed bandits on implicit metric spaces}},
year = {2011}
}
@inproceedings{agrawal2013thomson,
author = {Agrawal, Shipra and Goyal, Navin},
booktitle = {International Conference on Machine Learning},
title = {{Thompson sampling for contextual bandits with linear payoffs}},
year = {2013}
}
@inproceedings{kleinbergregret,
author = {Kleinberg, Robert D and Niculescu-Mizil, Alexandru and Sharma, Yogeshwer},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {425--436},
title = {{Regret Bounds for Sleeping Experts and Bandits}}
}
@article{bubeck2009pure,
abstract = {We consider the framework of stochastic multi-armed bandit problems and study the possibilities and limitations of strategies that perform an online exploration of the arms. The strategies are assessed in terms of their simple regret, a regret notion that captures the fact that exploration is only constrained by the number of available rounds (not necessarily known in advance), in contrast to the case when the cumulative regret is considered and when exploitation needs to be performed at the same time. We believe that this performance criterion is suited to situations when the cost of pulling an arm is expressed in terms of resources rather than rewards. We discuss the links between the simple and the cumulative regret. The main result is that the required explorationexploitation trade-offs are qualitatively different, in view of a general lower bound on the simple regret in terms of the cumulative regret.},
annote = {From Duplicate 1 ( Pure exploration in multi-armed bandits problems - Bubeck, S{\'{e}}bastien; Munos, R{\'{e}}mi; Stoltz, Gilles )
From Duplicate 1 ( Pure exploration in multi-armed bandits problems - Bubeck, S{\'{e}}bastien; Munos, R{\'{e}}mi; Stoltz, Gilles )
From Duplicate 4 ( Pure Exploration in Multi-armed Bandits Problems - Bubeck, S{\'{e}}bastien; Munos, R{\'{e}}mi; Stoltz, Gilles )
},
author = {Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi and Stoltz, Gilles},
journal = {Algorithmic Learning Theory},
keywords = {bandits,computational,information theoretic learning with statistics,theory {\&} algorithms},
mendeley-tags = {bandits},
pages = {23--37},
publisher = {Springer-Verlag},
title = {{Pure Exploration in Multi-armed Bandits Problems}},
year = {2009}
}
@article{slivkins2009contextual,
abstract = {In a multi-armed bandit (MAB) problem, an online algorithm makes a sequence of choices. In each round it chooses from a time-invariant set of alternatives and receives the payoff associated with this alternative. While the case of small strategy sets is by now well-understood, a lot of recent work has focused on MAB problems with exponentially or infinitely large strategy sets, where one needs to assume extra structure in order to make the problem tractable. In particular, recent literature considered information on similarity between arms. We consider similarity information in the setting of "contextual bandits", a natural extension of the basic MAB problem where before each round an algorithm is given the "context" - a hint about the payoffs in this round. Contextual bandits are directly motivated by placing advertisements on webpages, one of the crucial problems in sponsored search. A particularly simple way to represent similarity information in the contextual bandit setting is via a "similarity distance" between the context-arm pairs which gives an upper bound on the difference between the respective expected payoffs. Prior work on contextual bandits with similarity uses "uniform" partitions of the similarity space, which is potentially wasteful. We design more efficient algorithms that are based on adaptive partitions adjusted to "popular" context and "high-payoff" arms.},
annote = {From Duplicate 2 ( Contextual Bandits with Similarity Information - Slivkins, Aleksandrs )
And Duplicate 4 ( Contextual Bandits with Similarity Information - Slivkins, Aleksandrs )
And Duplicate 5 ( Contextual Bandits with Similarity Information - Slivkins, Aleksandrs )
From Duplicate 1 ( Contextual Bandits with Similarity Information - Slivkins, Aleksandrs )
},
author = {Slivkins, Aleksandrs},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Slivkins - 2009 - Contextual Bandits with Similarity Information.pdf:pdf},
journal = {Conference on Learning Theory},
keywords = {6,7 have been obtained,a postdoc brown,a preliminary version,a write up has,bandits,been circulated 2007,been posted arxiv,colt 2011,contextual bandits,full,has,metric spaces,multi armed bandits,online learning,org july 2009,regret minimization,results section,university,version a paper,which does not include,while author},
mendeley-tags = {bandits},
series = {COLT '11},
title = {{Contextual bandits with similarity information}},
year = {2009}
}
@inproceedings{ghosh2015ising,
author = {Ghosh, Shaona and Pr{\"{u}}gel-Bennett, Adam},
booktitle = {European Conference on Machine Learning},
title = {{Ising bandits with side Information}},
year = {2015}
}
@inproceedings{maillard2011adaptive,
author = {Maillard, Odalric-Ambrym and Munos, R{\'{e}}mi},
booktitle = {To appear in Proceedings of the 14th international conference on Artificial Intelligence and Statistics},
keywords = {bandits},
mendeley-tags = {bandits},
series = {JMLR W{\&}CP},
title = {{Adaptive bandits: Towards the best history-dependent strategy}},
volume = {15},
year = {2011}
}
@article{bubeck2011pure,
abstract = {We consider the framework of stochastic multi-armed bandit problems and study the possibilities and limitations of forecasters that perform an on-line exploration of the arms. These forecasters are assessed in terms of their simple regret, a regret notion that captures the fact that exploration is only constrained by the number of available rounds (not necessarily known in advance), in contrast to the case when the cumulative regret is considered and when exploitation needs to be performed at the same time. We believe that this performance criterion is suited to situations when the cost of pulling an arm is expressed in terms of resources rather than rewards. We discuss the links between the simple and the cumulative regret. One of the main results in the case of a finite number of arms is a general lower bound on the simple regret of a forecaster in terms of its cumulative regret: the smaller the latter, the larger the former. Keeping this result in mind, we then exhibit upper bounds on the simple regret of some forecasters. The paper ends with a study devoted to continuous-armed bandit problems; we show that the simple regret can be minimized with respect to a family of probability distributions if and only if the cumulative regret can be minimized for it. Based on this equivalence, we are able to prove that the separable metric spaces are exactly the metric spaces on which these regrets can be minimized with respect to the family of all probability distributions with continuous mean-payoff functions. ?? 2010 Elsevier B.V. All rights reserved.},
author = {Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi and Stoltz, Gilles},
journal = {Theoretical Computer Science},
keywords = {Continuous-armed bandits,Efficient exploration,Multi-armed bandits,Simple regret},
number = {19},
pages = {1832--1852},
title = {{Pure exploration in finitely-armed and continuous-armed bandits}},
volume = {412},
year = {2011}
}
@article{Alon2014a,
abstract = {We present and study a partial-information model of online learning, where a decision maker repeatedly chooses from a finite set of actions, and observes some subset of the associated losses. This naturally models several situations where the losses of different actions are related, and knowing the loss of one action provides information on the loss of other actions. Moreover, it generalizes and interpolates between the well studied full-information setting (where all losses are revealed) and the bandit setting (where only the loss of the action chosen by the player is revealed). We provide several algorithms addressing different variants of our setting, and provide tight regret bounds depending on combinatorial properties of the information feedback structure.},
author = {Alon, Noga and Cesa-Bianchi, Nicol{\`{o}} and Gentile, Claudio and Mannor, Shie and Mansour, Yishay and Shamir, Ohad},
month = {sep},
title = {{Nonstochastic Multi-Armed Bandits with Graph-Structured Feedback}},
url = {http://arxiv.org/abs/1409.8428},
year = {2014}
}
@inproceedings{maillard2011finite,
author = {Maillard, Odalric-Ambrym and Munos, R{\'{e}}mi and Stoltz, Gilles},
booktitle = {To appear in Proceedings of the 24th annual Conference On Learning Theory},
keywords = {bandits},
mendeley-tags = {bandits},
series = {COLT '11},
title = {{Finite-Time Analysis of Multi-armed Bandits Problems with Kullback-Leibler Divergences}},
year = {2011}
}
@inproceedings{ma2015active,
author = {Ma, Yifei and Huang, Tzu-Kuo and Schneider, Jeff},
booktitle = {Uncertainty in Artificial Intelligence},
title = {{Active search and bandits on graphs using sigma-optimality}},
year = {2015}
}
@inproceedings{furnkranz2010,
address = {Haifa, Israel},
booktitle = {Proceedings of the 27th International Conference on Machine Learning},
editor = {F{\"{u}}rnkranz, Johannes and Joachims, Thorsten},
keywords = {bandits},
mendeley-tags = {bandits},
month = {jun},
publisher = {Omnipress},
series = {ICML '10},
title = {{No Title}},
year = {2010}
}
@inproceedings{alon2015online,
abstract = {We study a general class of online learning problems where the feedback is specified by a graph. This class includes online prediction with expert advice and the multi-armed bandit problem, but also several learning problems where the online player does not necessarily observe his own loss. We analyze how the structure of the feedback graph controls the inherent difficulty of the induced {\$}T{\$}-round learning problem. Specifically, we show that any feedback graph belongs to one of three classes: strongly observable graphs, weakly observable graphs, and unobservable graphs. We prove that the first class induces learning problems with {\$}\backslashwidetilde\backslashTheta(\backslashalpha{\^{}}{\{}1/2{\}} T{\^{}}{\{}1/2{\}}){\$} minimax regret, where {\$}\backslashalpha{\$} is the independence number of the underlying graph; the second class induces problems with {\$}\backslashwidetilde\backslashTheta(\backslashdelta{\^{}}{\{}1/3{\}}T{\^{}}{\{}2/3{\}}){\$} minimax regret, where {\$}\backslashdelta{\$} is the domination number of a certain portion of the graph; and the third class induces problems with linear minimax regret. Our results subsume much of the previous work on learning with feedback graphs and reveal new connections to partial monitoring games. We also show how the regret is affected if the graphs are allowed to vary with time.},
author = {Alon, Noga and Cesa-Bianchi, Nicol{\`{o}} and Dekel, Ofer and Koren, Tomer},
booktitle = {Conference on Learning Theory},
title = {{Online learning with feedback graphs: Beyond bandits}},
year = {2015}
}
@article{russo2014learning,
author = {Russo, Daniel and {Van Roy}, Benjamin},
journal = {Mathematics of Operations Research},
title = {{Learning to Optimize Via Posterior Sampling}},
year = {2014}
}
@book{Shawe-Taylor2004,
author = {Shawe-Taylor, John and Cristianini, Nelo},
publisher = {Cambridge University Press},
title = {{Kernel Methods for Pattern Analysis}},
year = {2004}
}
@phdthesis{bubeck2010jeux,
author = {Bubeck, S{\'{e}}bastien},
keywords = {bandits},
mendeley-tags = {bandits},
school = {Universit{\{}{\'{e}}{\}} des Sciences et des Technologies de Lille 1},
title = {{Jeux de bandits et fondations du clustering}},
year = {2010}
}
@article{nino-nora2010computing,
author = {Nino-Mora, J},
doi = {10.1287/ijoc.1100.0398},
issn = {10919856},
journal = {INFORMS Journal on Computing},
keywords = {accepted may 2010,accepted winfried grassmann,advance,analysis algorithms,area editor computational,bandits,computational complexity,dynamic programming,finite horizon,history,index policies,march 2009,markov,may 2010,probability analysis,published online articles,received,revised january 2010},
number = {2},
pages = {254--267},
title = {{Computing a Classic Index for Finite-Horizon Bandits}},
url = {http://joc.journal.informs.org/cgi/doi/10.1287/ijoc.1100.0398},
volume = {23},
year = {2010}
}
@inproceedings{buccapatnam2014stochastic,
author = {Buccapatnam, Swapna and Eryilmaz, Atilla and Shroff, Ness B.},
booktitle = {International Conference on Measurement and Modeling of Computer Systems},
title = {{Stochastic bandits with side observations on networks}},
year = {2014}
}
@article{srinivas2009gaussian,
abstract = {Many applications require optimizing an unknown, noisy function that is expensive to evaluate. We formalize this task as a multi-armed bandit problem, where the payoff function is either sampled from a Gaussian process (GP) or has low RKHS norm. We resolve the important open problem of deriving regret bounds for this setting, which imply novel convergence rates for GP optimization. We analyze GP-UCB, an intuitive upper-confidence based algorithm, and bound its cumulative regret in terms of maximal information gain, establishing a novel connection between GP optimization and experimental design. Moreover, by bounding the latter in terms of operator spectra, we obtain explicit sublinear regret bounds for many commonly used covariance functions. In some important cases, our bounds have surprisingly weak dependence on the dimensionality. In our experiments on real sensor data, GP-UCB compares favorably with other heuristical GP optimization approaches.},
author = {Srinivas, Niranjan and Krause, Andreas and Kakade, Sham M. and Seeger, Matthias},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Srinivas et al. - 2010 - Gaussian Process Optimization in the Bandit Setting No Regret and Experimental Design.3995:3995},
journal = {International Conference on Machine Learning},
title = {{Gaussian process optimization in the bandit setting: No regret and experimental design}},
year = {2010}
}
@article{may2012optimistic,
author = {May, Benedict C. and Korda, Nathaniel and Lee, Anthony and Leslie, David S.},
journal = {Journal of Machine Learning Research},
number = {1},
pages = {2069--2106},
title = {{Optimistic Bayesian sampling in contextual-bandit problems}},
volume = {13},
year = {2012}
}
@article{audibert2009exploration,
author = {Audibert, Jean-Yves and Munos, R{\'{e}}mi and Szepesv{\'{a}}ri, Csaba},
journal = {Theoretical Computer Science},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {1876--1902},
title = {{Exploration-exploitation trade-off using variance estimates in multi-armed bandits}},
volume = {410},
year = {2009}
}
@inproceedings{maillard2010online,
address = {Berlin, Heidelberg},
author = {Maillard, Odalric-Ambrym and Munos, R{\'{e}}mi},
booktitle = {Proceedings of the 2010 European Conference on Machine Learning and Knowledge Discovery in Databases: Part II},
isbn = {3-642-15882-X, 978-3-642-15882-7},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {305--320},
publisher = {Springer-Verlag},
series = {ECML PKDD'10},
title = {{Online learning in adversarial Lipschitz environments}},
year = {2010}
}
@inproceedings{bubeck2010open,
author = {Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi},
booktitle = {Conference on Learning Theory},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Open-loop optimistic planning}},
year = {2010}
}
@inproceedings{ghahramani2007,
address = {Corvalis, Oregon, USA},
booktitle = {Proceedings of the 24th International Conference on Machine Learning},
editor = {Ghahramani, Zoubin},
isbn = {978-1-59593-793-3},
keywords = {bandits},
mendeley-tags = {bandits},
month = {jun},
publisher = {ACM},
series = {ICML '07, ACM International Conference Proceeding Series},
title = {{No Title}},
volume = {227},
year = {2007}
}
@inproceedings{becker2002,
address = {Vancouver, British Columbia, Canada},
booktitle = {Proceedings of the 16th conference on advances in Neural Information Processing Systems},
editor = {Becker, Suzanna and Thrun, Sebastian and Obermayer, Klaus},
isbn = {0-262-02550-7},
month = {dec},
publisher = {MIT Press},
series = {NIPS '02},
title = {{No Title}},
year = {2002}
}
@inproceedings{scholkopf2006,
address = {Vancouver, British Columbia, Canada},
booktitle = {Proceedings of the 20th conference on advances in Neural Information Processing Systems},
editor = {Sch{\"{o}}lkopf, Bernhard and Platt, John C and Hoffman, Thomas},
isbn = {0-262-19568-2},
keywords = {bandits},
mendeley-tags = {bandits},
month = {dec},
publisher = {MIT Press},
series = {NIPS '06},
title = {{No Title}},
year = {2006}
}
@inproceedings{li2015online,
abstract = {Classical collaborative filtering, and content-based filtering methods try to learn a static recommendation model given training data. These approaches are far from ideal in highly dynamic recommendation domains such as news recommendation and computational advertisement, where the set of items and users is very fluid. In this work, we investigate an adaptive clustering technique for content recommendation based on exploration-exploitation strategies in contextual multi-armed bandit settings. Our algorithm takes into account the collaborative effects that arise due to the interaction of the users with the items, by dynamically grouping users based on the items under consideration and, at the same time, grouping items based on the similarity of the clusterings induced over the users. The resulting algorithm thus takes advantage of preference patterns in the data in a way akin to collaborative filtering methods. We provide an empirical analysis on medium-size real-world datasets, showing scalability and increased prediction performance (as measured by click-through rate) over state-of-the-art methods for clustering bandits. We also provide a regret analysis within a standard linear stochastic noise setting.},
author = {Li, Shuai and Karatzoglou, Alexandros and Gentile, Claudio},
booktitle = {Conference on Research and Development in Information Retrieval},
title = {{Collaborative filtering Bandits}},
year = {2016}
}
@inproceedings{cesa-bianchi2013gang,
author = {Cesa-Bianchi, Nicol{\`{o}} and Gentile, Claudio and Zappella, Giovanni},
booktitle = {Neural Information Processing Systems},
title = {{A gang of bandits}},
year = {2013}
}
@inproceedings{dasgupta2009,
address = {Montreal,Quebec, Canada},
booktitle = {Proceedings of the 22nd annual Conference On Learning Theory},
editor = {Dasgupta, Sanjot and Klivans, Adam},
keywords = {bandits},
mendeley-tags = {bandits},
month = {jun},
series = {COLT '09},
title = {{No Title}},
year = {2009}
}
@article{Chen2014a,
abstract = {We define a general framework for a large class of combinatorial multi-armed bandit (CMAB) problems, where subsets of base arms with unknown distributions form super arms. In each round, a super arm is played and the base arms contained in the super arm are played and their outcomes are observed. We further consider the extension in which more based arms could be probabilistically triggered based on the outcomes of already triggered arms. The reward of the super arm depends on the outcomes of all played arms, and it only needs to satisfy two mild assumptions, which allow a large class of nonlinear reward instances. We assume the availability of an offline ($\backslash$alpha,$\backslash$beta)-approximation oracle that takes the means of the outcome distributions of arms and outputs a super arm that with probability {\{}$\backslash$beta{\}} generates an {\{}$\backslash$alpha{\}} fraction of the optimal expected reward. The objective of an online learning algorithm for CMAB is to minimize ($\backslash$alpha,$\backslash$beta)-approximation regret, which is the difference between the $\backslash$alpha{\{}$\backslash$beta{\}} fraction of the expected reward when always playing the optimal super arm, and the expected reward of playing super arms according to the algorithm. We provide CUCB algorithm that achieves O(log n) distribution-dependent regret, where n is the number of rounds played, and we further provide distribution-independent bounds for a large class of reward functions. Our regret analysis is tight in that it matches the bound of UCB1 algorithm (up to a constant factor) for the classical MAB problem, and it significantly improves the regret bound in a earlier paper on combinatorial bandits with linear rewards. We apply our CMAB framework to two new applications, probabilistic maximum coverage and social influence maximization, both having nonlinear reward structures. In particular, application to social influence maximization requires our extension on probabilistically triggered arms.},
author = {Chen, Wei and Wang, Yajun and Yuan, Yang},
month = {jul},
title = {{Combinatorial Multi-Armed Bandit and Its Extension to Probabilistically Triggered Arms}},
url = {http://arxiv.org/abs/1407.8339},
year = {2014}
}
@inproceedings{neu2013efficient,
author = {Neu, Gergely and Bart{\'{o}}k, G{\'{a}}bor},
booktitle = {Algorithmic Learning Theory},
title = {{An efficient algorithm for learning with semi-bandit feedback}},
year = {2013}
}
@inproceedings{lu2010contextual,
annote = {From Duplicate 1 ( Contextual Multi-Armed Bandits - Lu, Tyler; P{\'{a}}l, David; P{\'{a}}l, Martin )
And Duplicate 4 ( Contextual Multi-Armed Bandits - Lu, Tyler; P{\'{a}}l, David; P{\'{a}}l, Martin )
From Duplicate 1 ( Contextual Multi-Armed Bandits - Lu, Tyler; P{\'{a}}l, David; P{\'{a}}l, Martin )
},
author = {Lu, Tyler and P{\'{a}}l, D{\'{a}}vid and P{\'{a}}l, Martin},
booktitle = {Proceedings of the 13th international conference on Artificial Intelligence and Statistics},
editor = {Teh, Yee Whye and Titterington, Mike},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Lu, P{\'{a}}l, P{\'{a}}l - 2010 - Contextual Multi-Armed Bandits.pdf:pdf},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {485--492},
title = {{Contextual Multi-Armed Bandits}},
volume = {9},
year = {2010}
}
@inproceedings{platt2007,
address = {Vancouver, British Columbia, Canada},
booktitle = {Proceedings of the 21st conference on advances in Neural Information Processing Systems},
editor = {Platt, John C and Koller, Daphne and Singer, Yoram and Roweis, Sam T},
month = {dec},
publisher = {MIT Press},
series = {NIPS '07},
title = {{No Title}},
year = {2007}
}
@inproceedings{combes2014unimodal,
author = {Combes, Richard and Prouti{\`{e}}re, Alexandre},
booktitle = {International Conference on Machine Learning},
title = {{Unimodal bandits: Regret lower bounds and optimal algorithms}},
year = {2014}
}
@article{robbins1952some,
author = {Robbins, Herbert},
journal = {Bulletin of the American Mathematics Society},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {527--535},
title = {{Some aspects of the sequential design of experiments}},
volume = {58},
year = {1952}
}
@inproceedings{kawale2015efficient,
author = {Kawale, Jaya and Bui, Hung Hai and Kveton, Branislav and Tran-Thanh, Long and Chawla, Sanjay},
booktitle = {Neural Information Processing Systems},
title = {{Efficient Thompson sampling for online matrix-factorization recommendation}},
year = {2015}
}
@inproceedings{wen2013sequential,
author = {Wen, Zheng and Kveton, Branislav and Eriksson, Brian and Bhamidipati, Sandilya},
booktitle = {International Conference on Machine Learning},
title = {{Sequential Bayesian search}},
year = {2013}
}
@article{cicirello2005max,
abstract = {The multiarmed bandit is often used as an analogy for the tradeoff between exploration and exploitation in search problems. The classic problem involves allocating trials to the arms of a multiarmed slot machine to maximize the expected sum of rewards. We pose a new variation of the multiarmed bandit—the Max K-Armed Bandit—in which trials must be allocated among the arms to maximize the expected best single sample reward of the series of trials. Motivation for the Max K-Armed Bandit is the allocation of restarts among a set of multistart stochastic search algorithms. We present an analysis of this Max K-Armed Bandit showing under certain assumptions that the optimal strategy allocates trials to the observed best arm at a rate increasing double exponentially relative to the other arms. This motivates an exploration strategy that follows a Boltzmann distribution with an exponentially decaying temperature parameter. We compare this exploration policy to policies that allocate trials to the observed best arm at rates faster (and slower) than double exponentially. The results confirm, for two scheduling domains, that the double exponential increase in the rate of allocations to the observed best heuristic outperforms the other approaches.},
author = {Cicirello, Vincent A. and Smith, Stephen F.},
journal = {AAAI Conference on Artificial Intelligence},
title = {{The max k-armed bandit: A new model of exploration applied to search heuristic selection}},
url = {http://www.aaai.org/Papers/AAAI/2005/AAAI05-215.pdf},
year = {2005}
}
@inproceedings{audibert2007tuning,
author = {Audibert, Jean-Yves and Munos, R{\'{e}}mi and Szepesv{\'{a}}ri, Csaba},
booktitle = {Algorithmic Learning Theory},
title = {{Tuning Bandit Algorithms in Stochastic Environments}},
year = {2007}
}
@inproceedings{valko2013finite,
abstract = {We tackle the problem of online reward maximisation over a large finite set of actions described by their contexts. We focus on the case when the number of actions is too big to sample all of them even once. However we assume that we have access to the similarities between actions' contexts and that the expected reward is an arbitrary linear function of the contexts' images in the related reproducing kernel Hilbert space (RKHS). We propose KernelUCB, a kernelised UCB algorithm, and give a cumulative regret bound through a frequentist analysis. For contextual bandits, the related algorithm GP-UCB turns out to be a special case of our algorithm, and our finite-time analysis improves the regret bound of GP-UCB for the agnostic case, both in the terms of the kernel-dependent quantity and the RKHS norm of the reward function. Moreover, for the linear kernel, our regret bound matches the lower bound for contextual linear bandits.},
author = {Valko, Michal and Korda, Nathaniel and Munos, R{\'{e}}mi and Flaounas, Ilias and Cristianini, Nelo},
booktitle = {Uncertainty in Artificial Intelligence},
title = {{Finite-Time Analysis of Kernelised Contextual Bandits}},
year = {2013}
}
@article{agrawal1995continuum,
author = {Agrawal, R},
journal = {SIAM Journal on Control and Optimization},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {1926--1951},
title = {{The continuum-armed bandit problem}},
volume = {33},
year = {1995}
}
@inproceedings{guillou2016scalable,
author = {Guillou, Fr{\'{e}}d{\'{e}}ric and Gaudel, Romaric and Preux, Philippe},
booktitle = {Pacific Asia Conference on Information Systems},
title = {{Scalable explore-exploit collaborative filtering}},
year = {2016}
}
@inproceedings{abernethy2008competing,
author = {Abernethy, Jacob D and Hazan, Elad and Rakhlin, Alexander},
booktitle = {Conference on Learning Theory},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Competing in the dark: An efficient algorithm for bandit linear optimization.}},
year = {2008}
}
@inproceedings{kalenon,
author = {Kale, Satyen and Reyzin, Lev and Schapire, Robert E},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {1054--1062},
title = {{Non-Stochastic Bandit Slate Problems}}
}
@article{rakhlin2010online,
author = {Rakhlin, Alexander and Sridharan, Karthik and Tewari, Ambuj},
journal = {ArXiv e-prints},
keywords = {bandits},
mendeley-tags = {bandits},
month = {nov},
title = {{Online Learning: Beyond Regret}},
year = {2010}
}
@book{Audibert2007a,
address = {Berlin, Heidelberg},
author = {Audibert, Jean-Yves and Munos, R{\'{e}}mi and Szepesv{\'{a}}ri, Csaba},
editor = {Hutter, Marcus and Servedio, Rocco A. and Takimoto, Eiji},
issn = {0302-9743},
month = {oct},
pages = {150--165},
publisher = {Springer Berlin Heidelberg},
series = {Lecture Notes in Computer Science},
title = {{Algorithmic Learning Theory}},
url = {http://dl.acm.org/citation.cfm?id=1422422.1422442},
volume = {4754},
year = {2007}
}
@incollection{filippi2010parametric,
author = {Filippi, Sarah and Capp{\'{e}}, Olivier and Garivier, Aur{\'{e}}lien and Szepesv{\'{a}}ri, Csaba},
booktitle = {Advances in Neural Information Processing Systems 23},
editor = {Lafferty, J and Williams, C K I and Shawe-Taylor, J and Zemel, R S and Culotta, A},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {586--594},
title = {{Parametric Bandits: The Generalized Linear Case}},
year = {2010}
}
@inproceedings{abbasi2011improved,
author = {Abbasi-Yadkori, Yasin and P{\'{a}}l, D{\'{a}}vid and Szepesv{\'{a}}ri, Csaba},
booktitle = {Neural Information Processing Systems},
file = {:Users/miki/Downloads/linear-bandits-NIPS2011-camera-ready.pdf:pdf},
title = {{Improved algorithms for linear stochastic bandits}},
year = {2011}
}
@inproceedings{gu2014online,
author = {Gu, Quanquan and Han, Jiawei},
booktitle = {International Conference on Data Mining},
title = {{Online spectral learning on a graph with bandit feedback}},
year = {2014}
}
@article{munos2014from,
abstract = {This work covers several aspects of the optimism in the face of uncertainty principle applied to large scale optimization problems under finite numerical budget. The initial motivation for the research reported here originated from the empirical success of the so-called Monte-Carlo Tree Search method popularized in computer-go and further extended to many other games as well as optimization and planning problems. Our objective is to contribute to the development of theoretical foundations of the field by characterizing the complexity of the underlying optimization problems and designing efficient algorithms with performance guarantees. The main idea presented here is that it is possible to decompose a complex decision making problem (such as an optimization problem in a large search space) into a sequence of elementary decisions, where each decision of the sequence is solved using a (stochastic) multi-armed bandit (simple mathematical model for decision making in stochastic environments). This so-called hierarchical bandit approach (where the reward observed by a bandit in the hierarchy is itself the return of another bandit at a deeper level) possesses the nice feature of starting the exploration by a quasi-uniform sampling of the space and then focusing progressively on the most promising area, at different scales, according to the evaluations observed so far, and eventually performing a local search around the global optima of the function. The performance of the method is assessed in terms of the optimality of the returned solution as a function of the number of function evaluations. Our main contribution to the field of function optimization is a class of hierarchical optimistic algorithms designed for general search spaces (such as metric spaces, trees, graphs, Euclidean spaces, ...) with different algorithmic instantiations depending on whether the evaluations are noisy or noiseless and whether some measure of the ''smoothness'' of the function is known or unknown. The performance of the algorithms depend on the local behavior of the function around its global optima expressed in terms of the quantity of near-optimal states measured with some metric. If this local smoothness of the function is known then one can design very efficient optimization algorithms (with convergence rate independent of the space dimension), and when it is not known, we can build adaptive techniques that can, in some cases, perform almost as well as when it is known.},
author = {Munos, R{\'{e}}mi},
journal = {Foundations and Trends in Machine Learning},
pages = {1--130},
title = {{From bandits to Monte-Carlo tree search: The optimistic principle applied to optimization and planning}},
volume = {7(1)},
year = {2014}
}
@inproceedings{kveton2013learning,
abstract = {Face recognition from a single image per person is a challenging problem because the training sample is extremely small. We consider a variation of this problem. In our problem, we recognize only one person, and there are no labeled data for any other person. This setting naturally arises in authentication on personal computers and mobile devices, and poses additional challenges because it lacks negative examples. We formalize our problem as one-class classification, and propose and analyze an algorithm that learns a non-parametric model of the face from a single labeled image and a stream of unlabeled data. In many domains, for instance when a person interacts with a computer with a camera, unlabeled data are abundant and easy to utilize. This is the first paper that investigates how these data can help in learning better models in the single-image-per-person setting. Our method is evaluated on a dataset of 43 people and we show that these people can be recognized 90{\%} of time at nearly zero false positives. This recall is 25+{\%} higher than the recall of our best performing baseline. Finally, we conduct a comprehensive sensitivity analysis of our algorithm and provide a guideline for setting its parameters in practice.},
address = {Shanghai, China},
author = {Kveton, Branislav and Valko, Michal},
booktitle = {10th IEEE International Conference on Automatic Face and Gesture Recognition},
title = {{Learning from a Single Labeled Face and a Stream of Unlabeled Data}},
year = {2013}
}
@article{Pis75,
author = {Pisier, G},
journal = {Israel Journal of Mathematics},
pages = {326--350},
title = {{Martingales with values in uniformly convex spaces}},
volume = {20},
year = {1975}
}
@article{wolfe1976finding,
author = {Wolfe, P},
journal = {Math. Progr.},
number = {1},
pages = {128--149},
publisher = {Springer},
title = {{Finding the nearest point in a polytope}},
volume = {11},
year = {1976}
}
@book{dehaan2006extreme,
author = {de Haan, Laurens and Ferreira, Ana},
publisher = {Springer},
series = {Springer Series in Operations Research and Financial Engineering},
title = {{Extreme Value Theory: An Introduction}},
year = {2006}
}
@inproceedings{yang2006efficient,
annote = {comps{\_}distancX},
author = {Yang, Liu and Jin, Rong and Sukthankar, Rahul and Liu, Yi},
booktitle = {Proceedings, The Twenty-First National Conference on Artificial Intelligence and the Eighteenth Innovative Applications of Artificial Intelligence Conference, July 16-20, 2006, Boston, Massachusetts, USA},
title = {{An Efficient Algorithm for Local Distance Metric Learning}},
url = {http://www.cse.msu.edu/{~}yangliu1/aaai2006-distance-v7.pdf},
year = {2006}
}
@inproceedings{garcke2005semi,
author = {Garcke, Jochen and Griebel, Michael},
booktitle = {International Conference on Machine Learning},
title = {{Semi-supervised learning with sparse grids}},
year = {2005}
}
@inproceedings{bubeck2011lipschitz,
author = {Bubeck, S{\'{e}}bastien and Stoltz, Gilles and Yu, Jia Yuan},
booktitle = {Algorithmic Learning Theory},
title = {{Lipschitz Bandits without the Lipschitz Constant}},
year = {2011}
}
@inproceedings{koutis2012improved,
author = {Koutis, Ioannis and Levin, Alex and Peng, Richard},
booktitle = {STACS'12 (29th Symposium on Theoretical Aspects of Computer Science)},
organization = {LIPIcs},
pages = {266--277},
title = {{Improved spectral sparsification and numerical algorithms for SDD matrices}},
volume = {14},
year = {2012}
}
@article{Yasin2015,
abstract = {We present a new algorithm for the contextual bandit learning problem, where the learner repeatedly takes one of {\$}K{\$} actions in response to the observed context, and observes the reward only for that chosen action. Our method assumes access to an oracle for solving fully supervised cost-sensitive classification problems and achieves the statistically optimal regret guarantee with only {\$}\backslashtilde{\{}O{\}}(\backslashsqrt{\{}KT/\backslashlog N{\}}){\$} oracle calls across all {\$}T{\$} rounds, where {\$}N{\$} is the number of policies in the policy class we compete against. By doing so, we obtain the most practical contextual bandit learning algorithm amongst approaches that work for general policy classes. We further conduct a proof-of-concept experiment which demonstrates the excellent computational and prediction performance of (an online variant of) our algorithm relative to several baselines.},
archivePrefix = {arXiv},
arxivId = {cs/9605103},
author = {Yasin, M. A. and Al-Ashwal, W. A M and Shire, A. M. and Hamzah, S. A. and Ramli, K. N.},
doi = {10.1613/jair.301},
eprint = {9605103},
isbn = {0-7803-3213-X},
issn = {18196608},
journal = {ARPN Journal of Engineering and Applied Sciences},
keywords = {Bluetooth,GSM,PIFA,Tri-band},
number = {19},
pages = {8740--8744},
pmid = {17255001},
primaryClass = {cs},
title = {{Tri-band planar inverted F-antenna (PIFA) for GSM bands and bluetooth applications}},
volume = {10},
year = {2015}
}
@inproceedings{mahalanobis1936generalized,
author = {Mahalanobis, P},
booktitle = {Proc. National Inst. Sci. (India)},
pages = {49--55},
series = {12},
title = {{On the generalized distance in statistics}},
year = {1936}
}
@article{bach2011learning,
archivePrefix = {arXiv},
arxivId = {1111.6453},
author = {Bach, Francis},
eprint = {1111.6453},
keywords = {Learning,Optimization and Control},
title = {{Learning with Submodular Functions: A Convex Optimization Perspective}},
url = {http://arxiv.org/abs/1111.6453},
year = {2011}
}
@inproceedings{Cevher2009,
author = {Cevher, V and Indyk, P and Hegde, C and Baraniuk, R G},
booktitle = {Sampling Theory and Applications (SAMPTA)},
title = {{Recovery of clustered sparse signals from compressive measurements}},
year = {2009}
}
@article{GyLiLu08,
author = {Gy{\"{o}}rgy, A and Linder, T and Lugosi, G},
journal = {IEEE Transactions on Information Theory},
pages = {1604--1625},
title = {{Tracking the Best Quantizer}},
volume = {54},
year = {2008}
}
@incollection{bookchapter,
author = {Bach, F and Jenatton, R and Mairal, J and Obozinski, G},
booktitle = {Optimization for Machine Learning},
editor = {Sra, S and Nowozin, S and Wright, S J},
publisher = {MIT Press},
title = {{Convex optimization with sparsity-inducing norms}},
year = {2011}
}
@article{lowe1999object,
author = {Lowe, David},
journal = {IEEE International Conference on Computer Vision},
pages = {1150--1157},
title = {{Object Recognition from Local Scale-Invariant Features}},
volume = {2},
year = {1999}
}
@inproceedings{SST11,
author = {Srebro, N and Sridharan, K and Tewari, A},
booktitle = {Advances in Neural Information Processing Systems (NIPS)},
title = {{On the Universality of Online Mirror Descent}},
year = {2011}
}
@misc{doyle2000random,
abstract = {A popular account of the connection between random walks and electric
networks.},
annote = {* Fundamental matrix of the absorbing chain * Probabilistic interpretation of current and voltage},
author = {Doyle, Peter G and Snell, Laurie J},
keywords = {resistance},
month = {jan},
title = {{Random Walks and Electric Networks}},
url = {http://arxiv.org/abs/math/0001057},
year = {2000}
}
@article{spectralbandits,
author = {Authors},
journal = {Supplementary Material},
title = {{Spectral Bandits for Smooth Graph Functions}}
}
@inproceedings{valko2010online,
author = {Valko, Michal and Kveton, Branislav and Huang, Ling and Ting, Daniel},
booktitle = {Proceedings of the 26th Conference on Uncertainty in Artificial Intelligence},
keywords = {misovalko},
mendeley-tags = {misovalko},
title = {{Online semi-supervised learning on quantized graphs}},
url = {http://researchers.lille.inria.fr/{~}valko/hp/serve.php?what=publications/valko2010online.pdf},
year = {2010}
}
@phdthesis{bubeck2010bandits,
author = {Bubeck, S{\'{e}}bastien},
keywords = {bandits},
mendeley-tags = {bandits},
school = {Universit{\{}{\'{e}}{\}} de Lille 1},
title = {{Bandits Games and Clustering Foundations}},
year = {2010}
}
@inproceedings{das2008anomaly,
address = {New York, NY, USA},
author = {Das, Kaustav and Schneider, Jeff and Neill, Daniel B},
booktitle = {Proceeding of the 14th ACM SIGKDD international conference on Knowledge discovery and data mining},
isbn = {978-1-60558-193-4},
keywords = {anomaly detection,machine learning,pattern detection},
pages = {169--176},
publisher = {ACM},
series = {KDD '08},
title = {{Anomaly pattern detection in categorical datasets}},
year = {2008}
}
@techreport{dymetman2012os,
abstract = {Most current sampling algorithms for high-dimensional distributions are based on MCMC techniques and are approximate in the sense that they are valid only asymptotically. Rejection sampling, on the other hand, produces valid samples, but is unrealistically slow in high-dimension spaces. The OS* algorithm that we propose is a unified approach to exact optimization and sampling, based on incremental refinements of a functional upper bound, which combines ideas of adaptive rejection sampling and of A* optimization search. We show that the choice of the refinement can be done in a way that ensures tractability in high-dimension spaces, and we present first experiments in two different settings: inference in high-order HMMs and in large discrete graphical models.},
archivePrefix = {arXiv},
arxivId = {1207.0742},
author = {Dymetman, Marc and Bouchard, Guillaume and Carter, Simon},
eprint = {1207.0742},
institution = {http://arxiv.org/abs/1207.0742},
title = {{The OS* algorithm: A joint approach to exact optimization and sampling}},
year = {2012}
}
@article{zhao2003face,
author = {Zhao, Wen-Yi and Chellappa, Rama and Phillips, P and Rosenfeld, Azriel},
journal = {ACM Computing Surveys},
number = {4},
pages = {399--458},
title = {{Face Recognition: A Literature Survey}},
volume = {35},
year = {2003}
}
@book{chapelle2010semi-supervised,
author = {Chapelle, Olivier and Schlkopf, Bernhard and Zien, Alexander},
publisher = {The MIT Press},
title = {{Semi-Supervised Learning}},
year = {2010}
}
@book{Nesterov2004,
author = {Nesterov, Yurii},
publisher = {Kluwer Academic Publishers},
title = {{Introductory lectures on convex optimization: A basic course}},
year = {2004}
}
@article{dunn1980convergence,
author = {Dunn, J C},
journal = {SIAM Journal on Control and Optimization},
pages = {473--487},
title = {{Convergence rates for conditional gradient sequences generated by implicit step length rules}},
volume = {18},
year = {1980}
}
@inproceedings{neu12ssp-trans,
author = {Neu, Gergely and Gy{\"{o}}rgy, Andr{\'{a}}s and Szepesv{\'{a}}ri, $\backslash$textCsaba},
pages = {805--813},
title = {{The adversarial stochastic shortest path problem with unknown transition probabilities}}
}
@article{gittins1979bandit,
abstract = {The paper aims to give a unified account of the central concepts in recent work on bandit processes and dynamic allocation indices; to show how these reduce some previously intractable problems to the problem of calculating such indices; and to describe how these calculations may be carried out. Applications to stochastic scheduling, sequential clinical trials and a class of search problems are discussed.},
author = {Gittins, J C},
doi = {10.2307/2985029},
issn = {00359246},
journal = {Journal of the Royal Statistical Society Series B Methodological},
number = {2},
pages = {148--177},
publisher = {JSTOR},
series = {B},
title = {{Bandit processes and dynamic allocation indices}},
url = {http://www.jstor.org/stable/2985029},
volume = {41},
year = {1979}
}
@inproceedings{NgJo01,
author = {Ng, Andrew Y and Jordan, Michael I},
pages = {841--848},
title = {{On Discriminative vs. Generative Classifiers: A Comparison of Logistic Regression and Naive Bayes}}
}
@article{duane1987hybrid,
author = {Duane, Simon and Kennedy, A D and Pendleton, Brian and Roweth, Duncan},
journal = {Physics Letters B},
number = {2},
pages = {216--222},
title = {{Hybrid {\{}Monte Carlo{\}}}},
volume = {195},
year = {1987}
}
@article{rivas99dynamic,
address = {Department of Genetics, Washington University, St. Louis, MO, 63110, USA.},
author = {Rivas, E and Eddy, S R},
doi = {10.1006/jmbi.1998.2436},
issn = {0022-2836},
journal = {Journal of Molecular Biology},
keywords = {folding,pseudoknot,rna},
number = {5},
pages = {2053--2068},
title = {{A dynamic programming algorithm for {\{}RNA{\}} structure prediction including pseudoknots.}},
url = {http://dx.doi.org/10.1006/jmbi.1998.2436},
volume = {285},
year = {1999}
}
@inproceedings{sha2003shallow,
address = {Morristown, NJ, USA},
annote = {c{\_}omps{\_}models},
author = {Sha, Fei and Pereira, Fernando},
booktitle = {NAACL '03: Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology},
doi = {http://dx.doi.org/10.3115/1073445.1073473},
pages = {134--141},
publisher = {Association for Computational Linguistics},
title = {{Shallow parsing with conditional random fields}},
year = {2003}
}
@article{Batson:2013:SSG:2492007.2492029,
author = {Batson, Joshua and Spielman, Daniel A and Srivastava, Nikhil and Teng, Shang-Hua},
journal = {Commun. ACM},
number = {8},
pages = {87--94},
title = {{Spectral Sparsification of Graphs: Theory and Algorithms}},
volume = {56},
year = {2013}
}
@book{Dud99,
author = {Dudley, R},
publisher = {Cambridge University Press},
title = {{Uniform Central Limit Theorems}},
year = {1999}
}
@book{chung1997spectral,
author = {Chung, Fan},
keywords = {imported},
publisher = {American Mathematical Society},
title = {{Spectral Graph Theory}},
year = {1997}
}
@article{Bubeck2013,
abstract = {The stochastic multiarmed bandit problem is well understood when the reward distributions are sub-Gaussian. In this paper, we examine the bandit problem under the weaker assumption that the distributions have moments of order 1 + $\epsilon$, for some $\epsilon$ ∈ (0,1]. Surprisingly, moments of order 2 (i.e., finite variance) are sufficient to obtain regret bounds of the same order as under sub-Gaussian reward distributions. In order to achieve such regret, we define sampling strategies based on refined estimators of the mean such as the truncated empirical mean, Catoni's M-estimator, and the median-of-means estimator. We also derive matching lower bounds that also show that the best achievable regret deteriorates when $\epsilon$ {\textless}; 1. View full abstract},
archivePrefix = {arXiv},
arxivId = {arXiv:1209.1727v1},
author = {Bubeck, S{\'{e}}bastien and Cesa-Bianchi, Nicol{\`{o}} and Lugosi, G{\'{a}}bor},
doi = {10.1109/TIT.2013.2277869},
eprint = {arXiv:1209.1727v1},
issn = {00189448},
journal = {IEEE Transactions on Information Theory},
keywords = {Heavy-tailed distributions,regret bounds,robust estimators,stochastic multi-armed bandit},
number = {11},
pages = {7711--7717},
title = {{Bandits with heavy tail}},
volume = {59},
year = {2013}
}
@article{edmonds70submodular,
abstract = {The viewpoint of the subject of matroids, and related areas of lattice theory, has always been, in one way or another, abstraction$\backslash$nof algebraic dependence or, equivalently, abstraction of the incidence relations in geometric representations of algebra.$\backslash$nOften one of the main derived facts is that all bases have the same cardinality. (See Van der Waerden, Section 33.)},
author = {Edmonds, Jack},
journal = {Combinatorial Structures and Their Applications},
pages = {69--87},
title = {{Submodular functions, matroids, and certain polyhedra}},
year = {1970}
}
@inproceedings{babenko2009visual,
author = {Babenko, Boris and Yang, Ming-Hsuan and Belongie, Serge},
booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
title = {{Visual Tracking with Online Multiple Instance Learning}},
year = {2009}
}
@inproceedings{quattoni2009efficient,
author = {Quattoni, A and Carreras, X and Collins, M and Darrell, T},
booktitle = {Proc. ICML},
title = {{An efficient projection for $\backslash$ell{\_}1-$\backslash$ell{\_}$\backslash$infty regularization}},
year = {2009}
}
@inproceedings{lin2011-class-submod-sum,
address = {Portland, OR},
annote = {(long paper)},
author = {Lin, H and Bilmes, J},
booktitle = {North American chapter of the Association for Computational Linguistics/Human Language Technology Conference (NAACL/HLT-2011)},
month = {jun},
title = {{A Class of Submodular Functions for Document Summarization}},
year = {2011}
}
@inproceedings{HaSe09,
author = {Hazan, E and Seshadhri, C},
pages = {393--400},
title = {{Efficient learning algorithms for changing environments}}
}
@book{rockafellar97,
author = {Rockafellar, R T},
publisher = {Princeton University Press},
title = {{Convex Analysis}},
year = {1997}
}
@misc{gyorfi08survey,
address = {Tuebingen, Germany},
author = {Gyorfi, L and Ottucs{\'{a}}k, Gy. and Urb{\'{a}}n, A},
howpublished = {Machine Learning Summer School 2007, MLSS 2007 (invited lecture)},
title = {{Empirical log-optimal portfolio selections: a survey}},
year = {2008}
}
@article{bar-hillel2005learning,
annote = {comps{\_}distance},
author = {Bar-Hillel, Aharon and Hertz, Tomer and Shental, Noam and Weinshall, Daphna},
journal = {Journal of Machine Learning Research},
pages = {937--965},
title = {{Learning a Mahalanobis Metric from Equivalence Constraints}},
url = {http://www.jmlr.org/papers/v6/bar-hillel05a.html},
volume = {6},
year = {2005}
}
@inproceedings{zinkevich2003online,
author = {Zinkevich, Martin},
booktitle = {Proceedings of the 20th International Conference on Machine Learning},
pages = {928--936},
title = {{Online Convex Programming and Generalized Infinitesimal Gradient Ascent}},
year = {2003}
}
@inproceedings{koolen10comphedge,
author = {Koolen, Wouter M. and Warmuth, Manfred K. and Kivinen, Jyrki},
booktitle = {Conference on Learning Theory},
title = {{Hedging structured concepts}},
year = {2010}
}
@inproceedings{weinberger2007metric,
annote = {comps{\_}distance},
author = {Weinberger, K Q and Tesauro, G},
booktitle = {Proc. of the 11 thInternational Conference on Artificial Intelligence and Statistics},
title = {{Metric Learning for Kernel Regression}},
url = {http://www.stat.umn.edu/{~}aistat/proceedings/data/papers/077.pdf},
year = {2007}
}
@article{martino2011generalization,
author = {Martino, Luca and M{\'{i}}guez, Joaqu{\'{i}}n},
journal = {Statistics and Computing},
number = {4},
pages = {633--647},
title = {{A generalization of the adaptive rejection sampling algorithm}},
volume = {21},
year = {2011}
}
@article{rosset,
author = {Rosset, S and Zhu, J},
journal = {Ann. Statist.},
number = {3},
pages = {1012--1030},
title = {{Piecewise linear regularized solution paths}},
volume = {35},
year = {2007}
}
@inproceedings{priebe2005scan,
abstract = {We introduce a theory of scan statistics on graphs and apply the ideas to the problem of anomaly detection in a time series of Enron email graphs.},
author = {Priebe, Carey E. and Conroy, John M. and Marchette, David J. and Park, Youngser},
booktitle = {Computational and Mathematical Organization Theory},
doi = {10.1007/s10588-005-5378-z},
issn = {1381-298X},
pages = {229--247},
title = {{Scan Statistics on Enron Graphs}},
volume = {11},
year = {2005}
}
@article{harchaoui2008catching,
author = {Harchaoui, Z and L{\'{e}}vy-Leduc, C},
journal = {Adv. NIPS},
title = {{Catching change-points with {\{}L{\}}asso}},
volume = {20},
year = {2008}
}
@inproceedings{kocsis2006bandit,
abstract = {For large state-space Markovian Decision Problems Monte-Carlo planning is one of the few viable approaches to find near-optimal solutions. In this paper we introduce a new algorithm, UCT, that applies bandit ideas to guide Monte-Carlo planning. In finite-horizon or discounted MDPs the algorithm is shown to be consistent and finite sample bounds are derived on the estimation error due to sampling. Experimental results show that in several domains, UCT is significantly more efficient than its alternatives.},
author = {Kocsis, Levente and Szepesv{\'{a}}ri, Csaba},
booktitle = {European Conference on Machine Learning},
keywords = {bandits},
mendeley-tags = {bandits},
title = {{Bandit-based Monte-Carlo planning}},
year = {2006}
}
@inproceedings{K01,
author = {Kakade, Sham},
booktitle = {Advances in Neural Information Processing Systems 14 (NIPS)},
pages = {1531--1538},
title = {{A Natural Policy Gradient}},
year = {2001}
}
@techreport{statscience,
author = {Bach, F and Jenatton, R and Mairal, J and Obozinski, G},
institution = {HAL},
number = {00621245},
title = {{Structured sparsity through convex optimization}},
year = {2011}
}
@book{neuts1981matrix-geometric,
address = {Baltimore, MD},
author = {Neuts, Marcel},
publisher = {Johns Hopkins University Press},
title = {{Matrix-Geometric Solutions in Stochastic Models: An Algorithmic Approach}},
year = {1981}
}
@techreport{chekuri2011submodular,
author = {Chekuri, C and Vondr{\'{a}}k, J and Zenklusen, R},
institution = {Arxiv},
number = {1105.4593},
title = {{Submodular function maximization via the multilinear relaxation and contention resolution schemes}},
year = {2011}
}
@inproceedings{silva,
author = {da Silva, V F and Costa, A H R and Lima, P},
pages = {4246--4251},
title = {{Inverse Reinforcement Learning with Evaluation}}
}
@incollection{he2008nearest-neighbor-based,
address = {Cambridge, MA},
annote = {comps{\_}ano},
author = {He, Jingrui and Carbonell, Jaime},
booktitle = {Advances in Neural Information Processing Systems 20},
editor = {Platt, J C and Koller, D and Singer, Y and Roweis, S},
pages = {633--640},
publisher = {MIT Press},
title = {{Nearest-Neighbor-Based Active Learning for Rare Category Detection}},
url = {http://books.nips.cc/papers/files/nips20/NIPS2007{\_}0051.pdf},
year = {2008}
}
@article{edmonds71matroids,
author = {Edmonds, Jack},
journal = {Mathematical Programming},
number = {1},
pages = {127--136},
title = {{Matroids and the Greedy Algorithm}},
volume = {1},
year = {1971}
}
@incollection{bengio2004out-of-sample,
address = {Cambridge, MA},
author = {Bengio, Yoshua and Paiement, Jean-Fran{\c{c}}ois and Vincent, Pascal and Delalleau, Olivier and {Le Roux}, Nicolas and Ouimet, Marie},
booktitle = {Advances in Neural Information Processing Systems 16},
editor = {Thrun, Sebastian and Saul, Lawrence and Sch{\"{o}}lkopf, Bernhard},
keywords = {Isomap,Nystrom formula,dimensionality reduction,eigenfunctions learning,kernel PCA,locally linear embedding,spectral methods},
publisher = {MIT Press},
title = {{Out-of-Sample Extensions for {\{}LLE{\}}, Isomap, {\{}MDS{\}}, Eigenmaps, and Spectral Clustering}},
year = {2004}
}
@article{boucheron2012concentration,
abstract = {This note describes non-asymptotic variance and tail bounds for order statistics of samples of independent identically distributed random variables. Those bounds are checked to be asymptotically tight when the sampling distribution belongs to a maximum domain of attraction. If the sampling distribution has non-decreasing hazard rate (this includes the Gaussian distribution), we derive an exponential Efron-Stein inequality for order statistics: an inequality connecting the logarithmic moment generating function of centered order statistics with exponential moments of Efron-Stein (jackknife) estimates of variance. We use this general connection to derive variance and tail bounds for order statistics of Gaussian sample. Those bounds are not within the scope of the Tsirelson-Ibragimov-Sudakov Gaussian concentration inequality. Proofs are elementary and combine R$\backslash$'enyi's representation of order statistics and the so-called entropy approach to concentration inequalities popularized by M. Ledoux.},
archivePrefix = {arXiv},
arxivId = {1207.7209},
author = {Boucheron, St{\'{e}}phane and Thomas, Maud},
doi = {10.1214/ECP.v17-2210},
eprint = {1207.7209},
issn = {1083589X},
journal = {Electronic Communications in Probability},
keywords = {Concentration inequalities,Efron-Stein inequalities,Entropy method,Order statistics,Renyi's representation},
title = {{Concentration inequalities for order statistics}},
volume = {17},
year = {2012}
}
@article{TW03,
author = {Takimoto, E and Warmuth, M},
journal = {Journal of Machine Learning Research},
pages = {773--818},
title = {{Paths kernels and multiplicative updates}},
volume = {4},
year = {2003}
}
@incollection{GP91,
author = {Gupta, S and Panchapakesan, S},
booktitle = {Handbook of Sequential Analysis},
editor = {Ghosh, B and Sen, P},
pages = {363--380},
title = {{On sequential ranking and selection procedures}},
year = {1991}
}
@article{bertsekas1995counterexample,
author = {Bertsekas, Dimitri},
journal = {Neural Computation},
number = {2},
pages = {270--279},
title = {{A Counterexample for Temporal Differences Learning}},
volume = {7},
year = {1995}
}
@inproceedings{neu2014online,
abstract = {Most work on sequential learning assumes a fixed set of actions that are available all the time. However, in practice, actions can consist of picking subsets of readings from sensors that may break from time to time, road segments that can be blocked or goods that are out of stock. In this paper we study learning algorithms that are able to deal with stochastic availability of such unreliable composite actions. We propose and analyze algorithms based on the Follow-The-Perturbed-Leader prediction method for several learning settings differing in the feedback provided to the learner. Our algorithms rely on a novel loss estimation technique that we call Counting Asleep Times. We deliver regret bounds for our algorithms for the previously studied full information and (semi-)bandit settings, as well as a natural middle point between the two that we call the restricted information setting. A special consequence of our results is a significant improvement of the best known performance guarantees achieved by an efficient algorithm for the sleeping bandit problem with stochastic availability. Finally, we evaluate our algorithms empirically and show their improvement over the known approaches.},
author = {Neu, Gergely and Valko, Michal},
booktitle = {Neural Information Processing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Neu, Valko - 2014 - Online combinatorial optimization with stochastic decision sets and adversarial losses.pdf:pdf},
title = {{Online combinatorial optimization with stochastic decision sets and adversarial losses}},
year = {2014}
}
@inproceedings{ng2001spectral,
abstract = {Despite many empirical successes of spectral clustering methods| algorithms that cluster points using eigenvectors of matrices derived from the data|there are several unresolved issues. First, there are a wide variety of algorithms that use the eigenvectors in slightly dierent ways. Second, many of these algorithms have no proof that they will actually compute a reasonable clustering. In this paper, we present a simple spectral clustering algorithm that can be implemented using a few lines of Matlab. Using tools from matrix perturbation theory, we analyze the algorithm, and give conditions under which it can be expected to do well. We also show surprisingly good experimental results on a number of challenging clustering problems. 1},
author = {Ng, Andrew Y and Jordan, Michael I and Weiss, Yair},
booktitle = {Neural Information Processing Systems},
keywords = {clustering community detection graph spectral theo},
title = {{On spectral clustering: Analysis and an algorithm}},
year = {2001}
}
@inproceedings{he2007graph-based,
address = {San Francisco, CA, USA},
author = {He, Jingrui and Carbonell, Jaime and Liu, Yan},
booktitle = {Proceedings of the 20th international joint conference on Artifical intelligence},
pages = {2492--2497},
publisher = {Morgan Kaufmann Publishers Inc.},
title = {{Graph-based semi-supervised learning as a generative model}},
url = {http://portal.acm.org/citation.cfm?id=1625275.1625677},
year = {2007}
}
@article{lanczos1950iteration,
abstract = {The present investigation designs a systematic method for finding the latent roots and the principal axes of a matrix, without reducing the order of the matrix. It is characterized by a wide field of applicability and great accuracy, since the accumulation of rounding errors is avoided, through the process of "minimized iterations". Moreover, the method leads to a well convergent successive approximation procedure by which the solution of integral equations of the Fredholm type and the solution of the eigenvalue problem of linear differential and integral operators may be accomplished.},
author = {Lanczos, C.},
doi = {10.6028/jres.045.026},
issn = {0091-0635},
journal = {Journal of Research of the National Bureau of Standards},
number = {4},
pages = {255},
title = {{An iteration method for the solution of the eigenvalue problem of linear differential and integral operators}},
url = {http://nvlpubs.nist.gov/nistpubs/jres/045/jresv45n4p255{\_}A1b.pdf},
volume = {45},
year = {1950}
}
@inproceedings{globerson2006metric,
address = {Cambridge, MA},
annote = {comps{\_}distance},
author = {Globerson, Amir and Roweis, Sam},
booktitle = {Advances in Neural Information Processing Systems 18},
editor = {Weiss, Y and Sch{\"{o}}lkopf, B and Platt, J},
pages = {451--458},
publisher = {MIT Press},
title = {{Metric Learning by Collapsing Classes}},
url = {http://books.nips.cc/papers/files/nips18/NIPS2005{\_}0388.pdf},
year = {2006}
}
@inproceedings{blum2001learning,
address = {San Francisco, CA, USA},
author = {Blum, Avrim and Chawla, Shuchi},
booktitle = {ICML '01: Proceedings of the Eighteenth International Conference on Machine Learning},
isbn = {1-55860-778-1},
pages = {19--26},
publisher = {Morgan Kaufmann Publishers Inc.},
title = {{Learning from Labeled and Unlabeled Data using Graph Mincuts}},
year = {2001}
}
@book{hastie2001elements,
abstract = {During the past decade there has been an explosion in computation
and information technology. With it has come vast amounts of data
in a variety of fields such as medicine, biology, finance, and marketing.
The challenge of understanding these data has led to the development
of new tools in the field of statistics, and spawned new areas such
as data mining, machine learning, and bioinformatics. Many of these
tools have common underpinnings but are often expressed with different
terminology. This book descibes theimprtant ideas in these areas
ina common conceptual framework. While the approach is statistical,
the emphasis is on concepts rather than mathematics. Many examples
are given, with a liberal use of color graphics. It should be a vluable
resource for statisticians and anyone interested in data mining in
science or industry. The book's coverage is broad, from supervised
learing (prediction) to unsupervised learning. The many topics include
neural networks, support vector machines, classification trees and
boosting-the first comprehensive treatment of this topic in any book.
Trevor Hastie, Robert Tibshirani, and Jerome Friedman are professors
of statistics at Stanford University. They are prominent researchers
in this area: Hastie and Tibshirani developed generalized additive
models and wrote a popular book of that title. Hastie wrote much
of the statistical modeling software in S-PLUS and invented principal
curves and surfaces. Tibshirani proposed the Lasso and is co-author
of the very successful An Introduction to the Bootstrap. Friedman
is the co-inventor of many data-mining tools including CART, MARS,
and projection pursuit.},
author = {Hastie, T and Tibshirani, R and Friedman, J H},
howpublished = {Hardcover},
isbn = {0387952845},
keywords = {machine-learning,statistic},
month = {aug},
publisher = {Springer},
title = {{The Elements of Statistical Learning}},
year = {2001}
}
@inproceedings{audibert09minimax,
annote = {From Duplicate 1 (Minimax Policies for Bandits Games - Audibert, J.-Y.; Bubeck, S)
Submitted},
author = {Audibert, J.-Y. and Bubeck, S},
booktitle = {Journal of Machine Learning Research},
title = {{Minimax Policies for Bandits Games}},
year = {2010}
}
@inproceedings{taskar05learning,
author = {Taskar, Ben and Chatalbashev, Vassil and Koller, Daphne and Guestrin, Carlos},
pages = {896--903},
title = {{Learning structured prediction models: a large margin approach}}
}
@article{jenatton2009structured,
author = {Jenatton, R and Audibert, J-Y. and Bach, F},
journal = {Journal of Machine Learning Research},
pages = {2777--2824},
title = {{Structured Variable Selection with Sparsity-Inducing Norms}},
volume = {12},
year = {2011}
}
@article{jones1993lipschitzian,
abstract = {We present a new algorithm for finding the global minimum of a multivariate function subject to simple bounds. The algorithm is a modification of the standard Lipschitzian approach that eliminates the need to specify a Lipschitz constant. This is done by carrying out simultaneous searches using all possible constants from zero to infinity. On nine standard test functions, the new algorithm converges in fewer function evaluations than most competing methods.},
author = {Jones, David and Perttunen, Cary and Stuckman, Bruce},
doi = {10.1007/BF00941892},
issn = {00223239},
journal = {Journal of Optimization Theory and Applications},
number = {1},
pages = {157--181},
publisher = {Springer},
title = {{Lipschitzian optimization without the Lipschitz constant}},
url = {http://www.springerlink.com/index/10.1007/BF00941892},
volume = {79},
year = {1993}
}
@incollection{joshi97treeadjoining,
address = {Berlin, New York},
author = {Joshi, Aravind K and Schabes, Yves},
booktitle = {Handbook of Formal Languages},
pages = {69--124},
publisher = {Springer},
title = {{Tree-Adjoining Grammars}},
url = {citeseer.ist.psu.edu/joshi97treeadjoining.html},
volume = {3},
year = {1997}
}
@article{CBCoGe04,
author = {Cesa-Bianchi, N and Conconi, A and Gentile, C},
journal = {IEEE Transactions on Information Theory},
pages = {2050--2057},
title = {{On the Generalization Ability of On-Line Learning Algorithms}},
volume = {50},
year = {2004}
}
@book{lauritzen96graphical,
author = {Lauritzen, S L},
howpublished = {Hardcover},
month = {jul},
publisher = {Oxford University Press, USA},
title = {{Graphical Models (Oxford Statistical Science Series)}},
year = {1996}
}
@inproceedings{keshavan2009matrix,
author = {Keshavan, Raghunandan and Oh, Sewoong and Montanari, Andrea},
booktitle = {International Symposium on Information Theory},
title = {{Matrix Completion from a Few Entries}},
year = {2009}
}
@inproceedings{shental2002adjustment,
address = {London, UK},
author = {Shental, Noam and Hertz, Tomer and Weinshall, Daphna and Pavel, Misha},
booktitle = {ECCV '02: Proceedings of the 7th European Conference on Computer Vision-Part IV},
isbn = {3-540-43748-7},
pages = {776--792},
publisher = {Springer-Verlag},
title = {{Adjustment Learning and Relevant Component Analysis}},
year = {2002}
}
@phdthesis{vanroy1998planning,
author = {{Van Roy}, Benjamin},
school = {Massachusetts Institute of Technology},
title = {{Planning Under Uncertainty in Complex Structured Environments}},
year = {1998}
}
@article{hill1975simple,
author = {Hill, Bruce M.},
journal = {The Annals of Statistics},
keywords = {Bayesian inference,Tail of distribution,order statistics},
language = {EN},
number = {5},
pages = {1163--1174},
publisher = {Institute of Mathematical Statistics},
title = {{A Simple General Approach to Inference About the Tail of a Distribution}},
volume = {3},
year = {1975}
}
@article{guestrin2003efficient,
author = {Guestrin, Carlos and Koller, Daphne and Parr, Ronald and Venkataraman, Shobha},
journal = {Journal of Artificial Intelligence Research},
pages = {399--468},
title = {{Efficient Solution Algorithms for Factored {\{}MDPs{\}}}},
volume = {19},
year = {2003}
}
@inproceedings{kveton2007adaptive,
author = {Kveton, Branislav and Gandhi, Prashant and Theocharous, Georgios and Mannor, Shie and Rosario, Barbara and Shah, Nilesh},
booktitle = {Proceedings of the 22nd National Conference on Artificial Intelligence},
pages = {1795--1800},
title = {{Adaptive Timeout Policies for Fast Fine-Grained Power Management}},
year = {2007}
}
@article{gallo1989fast,
author = {Gallo, G and Grigoriadis, M D and Tarjan, R E},
journal = {SIAM Journal on Computing},
number = {1},
pages = {30--55},
title = {{A fast parametric maximum flow algorithm and applications}},
volume = {18},
year = {1989}
}
@article{bellman1963polynomial,
author = {Bellman, Richard and Kalaba, Robert and Kotkin, Bella},
journal = {Mathematics of Computation},
number = {82},
pages = {155--161},
title = {{Polynomial Approximation -- A New Computational Technique in Dynamic Programming: Allocation Processes}},
volume = {17},
year = {1963}
}
@inproceedings{kocak2014spectral,
abstract = {Thompson Sampling (TS) has surged a lot of interest due to its good empirical performance, in particular in the computational advertising. Though successful, the tools for its performance analysis appeared only recently. In this paper, we describe and analyze SpectralTS algorithm for a bandit problem, where the payoffs of the choices are smooth given an underlying graph. In this setting, each choice is a node of a graph and the expected payoffs of the neighboring nodes are assumed to be similar. Although the setting has application both in recommender systems and advertising, the traditional algorithms would scale poorly with the number of choices. For that purpose we consider an effective dimension d, which is small in real-world graphs. We deliver the analysis showing that the regret of SpectralTS scales as d$\backslash$sqrt(T $\backslash$ln N) with high probability, where T is the time horizon and N is the number of choices. Since a d$\backslash$sqrt(T $\backslash$ln N) regret is comparable to the known results, SpectralTS offers a computationally more efficient alternative. We also show that our algorithm is competitive on both synthetic and real-world data.},
author = {Koc{\'{a}}k, Tom{\'{a}}{\v{s}} and Valko, Michal and Munos, R{\'{e}}mi and Agrawal, Shipra},
booktitle = {AAAI Conference on Artificial Intelligence},
title = {{Spectral Thompson sampling}},
year = {2014}
}
@book{BoLuMa13,
author = {Boucheron, S and Lugosi, G and Massart, P},
publisher = {Oxford University Press},
title = {{Concentration inequalities:A Nonasymptotic Theory of Independence}},
year = {2013}
}
@inproceedings{kanade2009sleeping,
author = {Kanade, Varun and McMahan, H Brendan and Bryan, Brent},
booktitle = {Proceedings of the 12th international conference on Artificial Intelligence and Statistics},
keywords = {bandits},
mendeley-tags = {bandits},
number = {5},
pages = {272--279},
series = {AI{\&}Stats '09},
title = {{Sleeping Experts and Bandits with Stochastic Action Availability and Adversarial Rewards}},
year = {2009}
}
@inproceedings{hauskrecht2010conditional,
abstract = {We develop and evaluate a data-driven approach for detecting unusual (anomalous) patient-management actions using past patient cases stored in an electronic health record (EHR) system. Our hypothesis is that patient-management actions that are unusual with respect to past patients may be due to a potential error and that it is worthwhile to raise an alert if such a condition is encountered. We evaluate this hypothesis using data obtained from the electronic health records of 4,486 post-cardiac surgical patients. We base the evaluation on the opinions of a panel of experts. The results support that anomaly-based alerting can have reasonably low false alert rates and that stronger anomalies are correlated with higher alert rates.},
author = {Hauskrecht, Milos and Valko, Michal and Batal, Iyad and Clermont, Gilles and Visweswaran, Shyam and Cooper, Gregory F},
booktitle = {Annual American Medical Informatics Association Symposium},
keywords = {misovalko},
mendeley-tags = {misovalko},
title = {{Conditional outlier detection for clinical alerting}},
year = {2010}
}
@article{Beck2009,
author = {Beck, A and Teboulle, M},
journal = {SIAM Journal on Imaging Sciences},
number = {1},
pages = {183--202},
title = {{A fast iterative shrinkage-thresholding algorithm for linear inverse problems}},
volume = {2},
year = {2009}
}
@book{NY83,
author = {Nemirovski, A and Yudin, D},
publisher = {Wiley Interscience},
title = {{Problem Complexity and Method Efficiency in Optimization}},
year = {1983}
}
@inproceedings{grudic2000localizing,
author = {Grudic, Gregory and Ungar, Lyle},
booktitle = {Proceedings of 17th International Conference on Machine Learning},
pages = {343--350},
title = {{Localizing Policy Gradient Estimates to Action Transitions}},
year = {2000}
}
@inproceedings{Huang2009,
author = {Huang, J and Zhang, T and Metaxas, D},
booktitle = {Proceedings of the International Conference on Machine Learning (ICML)},
title = {{Learning with structured sparsity}},
year = {2009}
}
@book{Mas06,
author = {Massart, P},
publisher = {Springer},
title = {{Ecole d'Ete de Probabilites de Saint-Flour XXXIII - 2003}},
year = {2006}
}
@article{luxburg2007tutorial,
author = {Luxburg, Ulrike},
journal = {Statistics and Computing},
number = {4},
pages = {395--416},
title = {{A tutorial on spectral clustering}},
volume = {17},
year = {2007}
}
@inproceedings{saluja2014graph,
author = {Saluja, Avneesh and Hassan, Hany and Toutanova, Kristina and Quirk, Chris},
booktitle = {Proceedings of ACL'14},
title = {{Graph-based semi-supervised learning of translation models from monolingual data}},
year = {2014}
}
@article{isaac2009overrides,
abstract = {BACKGROUND: Electronic prescribing systems with decision support may
improve patient safety in ambulatory care by offering drug allergy
and drug interaction alerts. However, preliminary studies show that
clinicians override most of these alerts. METHODS: We performed a
retrospective analysis of 233 537 medication safety alerts generated
by 2872 clinicians in Massachusetts, New Jersey, and Pennsylvania
who used a common electronic prescribing system from January 1, 2006,
through September 30, 2006. We used multivariate techniques to examine
factors associated with alert acceptance. RESULTS: A total of 6.6{\%}
of electronic prescription attempts generated alerts. Clinicians
accepted 9.2{\%} of drug interaction alerts and 23.0{\%} of allergy alerts.
High-severity interactions accounted for most alerts (61.6{\%}); clinicians
accepted high-severity alerts slightly more often than moderate-
or low-severity interaction alerts (10.4{\%}, 7.3{\%}, and 7.1{\%}, respectively;
P {\textless} .001). Clinicians accepted 2.2{\%} to 43.1{\%} of high-severity interaction
alerts, depending on the classes of interacting medications. In multivariable
analyses, we found no difference in alert acceptance among clinicians
of different specialties (P = .16). Clinicians were less likely to
accept a drug interaction alert if the patient had previously received
the alerted medication (odds ratio, 0.03; 95{\%} confidence interval,
0.03-0.03). CONCLUSION: Clinicians override most medication alerts,
suggesting that current medication safety alerts may be inadequate
to protect patient safety.},
author = {Isaac, Thomas and Weissman, Joel S and Davis, Roger B and Massagli, Michael and Cyrulik, Adrienne and Sands, Daniel Z and Weingart, Saul N},
doi = {10.1001/archinternmed.2008.551},
institution = {Division of General Medicine and Primary Care, Beth Israel Deaconess Medical Center, 330 Brookline Ave., Boston, MA 02215, USA. tisaac@bidmc.harvard.edu},
journal = {Arch Intern Med},
keywords = {80 and over; Ambulatory Care; Drug Hypersensitivi,Adolescent; Adult; Adverse Drug Reaction Reporting,Computer-Assisted; Female; Humans; Male; Medical,prevention /{\&}/ control; Drug Interactions; Drug T,prevention /{\&}/ control; Medicine,statistics /{\&}/ numerical data; Middle Aged; Physi,statistics /{\&}/ numerical data; Retrospective Stud},
month = {feb},
number = {3},
pages = {305--311},
pmid = {19204222},
title = {{Overrides of medication alerts in ambulatory care.}},
url = {http://dx.doi.org/10.1001/archinternmed.2008.551},
volume = {169},
year = {2009}
}
@inproceedings{guestrin2002multiagent,
author = {Guestrin, Carlos and Koller, Daphne and Parr, Ronald},
booktitle = {Advances in Neural Information Processing Systems 14},
pages = {1523--1530},
title = {{Multiagent Planning with Factored {\{}MDPs{\}}}},
year = {2002}
}
@article{ramachandran2007bayesian,
abstract = {Inverse Reinforcement Learning (IRL) is the problem of learning the reward function underlying a Markov Decision Process given the dynamics of the system and the behaviour of an expert. IRL is motivated by situations where knowledge of the rewards is a goal by itself (as in preference elicitation) and by the task of apprenticeship learning (learning policies from an expert). In this paper we show how to combine prior knowledge and evidence from the expert's actions to derive a probability distribution over the space of reward functions. We present efficient algorithms that find solutions for the reward learning and apprenticeship learning tasks that generalize well over these distributions. Experimental results show strong improvement for our methods over previous heuristic-based approaches.},
author = {Ramachandran, Deepak and Amir, Eyal},
editor = {Shawe-Taylor, J and Zemel, R S and Bartlett, P and Pereira, F C N and Weinberger, K Q},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Ramachandran, Amir - 2007 - Bayesian Inverse Reinforcement Learning.pdf:pdf},
journal = {Learning},
keywords = {markov decision processes,reinforcement learning},
pages = {2586--2591},
publisher = {Morgan Kaufmann Publishers Inc.},
title = {{Bayesian Inverse Reinforcement Learning}},
url = {http://www.aaai.org/Papers/IJCAI/2007/IJCAI07-416.pdf},
volume = {51},
year = {2007}
}
@article{manevitz2002one-class,
author = {Manevitz, Larry M and Yousef, Malik},
issn = {1532-4435},
journal = {J. Mach. Learn. Res.},
pages = {139--154},
publisher = {JMLR.org},
title = {{One-class svms for document classification}},
volume = {2},
year = {2002}
}
@article{hunter2004tutorial,
author = {Hunter, D R and Lange, K},
journal = {The American Statistician},
number = {1},
pages = {30--37},
publisher = {ASA},
title = {{A tutorial on {\{}MM{\}} algorithms}},
volume = {58},
year = {2004}
}
@inproceedings{das2011submodular,
author = {Das, A and Kempe, D},
booktitle = {Proc. ICML},
title = {{Submodular meets spectral: Greedy algorithms for subset selection, sparse approximation and dictionary selection}},
year = {2011}
}
@inproceedings{sammut92learning,
author = {Sammut, Claude and Hurst, Scott and Kedzier, Dana and Michie, Donald},
pages = {385--393},
title = {{Learning to Fly}}
}
@inproceedings{LaMu09,
author = {Lazaric, Alessandro and Munos, R{\'{e}}mi},
title = {{Hybrid Stochastic-Adversarial On-line Learning}}
}
@article{gnedenko1943distribution,
author = {Gnedenko, Boris},
journal = {The Annals of Mathematics},
number = {3},
pages = {423--453},
publisher = {JSTOR},
title = {{Sur la distribution limite du terme maximum d'une s{\'{e}}rie al{\'{e}}atoire}},
volume = {44},
year = {1943}
}
@inproceedings{campbell2000linear,
annote = {comps{\_}ano},
author = {Campbell, Colin and Bennett, Kristin P},
booktitle = {Advances in Neural Information Processing Systems 13, Papers from Neural Information Processing Systems (NIPS) 2000},
pages = {395--401},
title = {{A Linear Programming Approach to Novelty Detection}},
url = {http://books.nips.cc/papers/files/nips13/CampbellBennett.pdf},
year = {2000}
}
@techreport{alon2014nonstochastic,
abstract = {We present and study a partial-information model of online learning, where a decision maker repeatedly chooses from a finite set of actions, and observes some subset of the associated losses. This naturally models several situations where the losses of different actions are related, and knowing the loss of one action provides information on the loss of other actions. Moreover, it generalizes and interpolates between the well studied full-information setting (where all losses are revealed) and the bandit setting (where only the loss of the action chosen by the player is revealed). We provide several algorithms addressing different variants of our setting, and provide tight regret bounds depending on combinatorial properties of the information feedback structure.},
archivePrefix = {arXiv},
arxivId = {1409.8428},
author = {Alon, Noga and Cesa-Bianchi, Nicol{\`{o}} and Gentile, Claudio and Mannor, Shie and Mansour, Yishay and Shamir, Ohad},
eprint = {1409.8428},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Alon et al. - 2014 - Nonstochastic Multi-Armed Bandits with Graph-Structured Feedback.pdf:pdf},
title = {{Nonstochastic multi-armed bandits with graph-structured feedback}},
year = {2014}
}
@article{dunn1978conditional,
author = {Dunn, J C and Harshbarger, S},
journal = {Journal of Mathematical Analysis and Applications},
number = {2},
pages = {432--444},
publisher = {Elsevier},
title = {{Conditional gradient algorithms with open loop step size rules}},
volume = {62},
year = {1978}
}
@techreport{williams1993tight,
author = {Williams, Ronald and {Baird III}, Leemon},
institution = {Northeastern University},
number = {NU-CCS-93-14},
title = {{Tight Performance Bounds on Greedy Policies Based on Imperfect Value Functions}},
year = {1993}
}
@misc{leskovec2014snapnets,
author = {Leskovec, Jure and Krevl, Andrej},
howpublished = {http://snap.stanford.edu/data},
month = {jun},
title = {{SNAP datasets: Stanford large network dataset collection}},
year = {2014}
}
@inproceedings{florez-larrahondo2005efficient,
abstract = {Anomaly detection systems are developed by learning a baseline-model
from a set of events captured from a computer system operating under
normal conditions. The model is then used to recognize unusual activities
as deviations from normality. Hidden Markov models (HMMs) are powerful
probabilistic finite state machines that have been used to acquire
these baseline-models. Although previous research has indicated that
HMMs can effectively represent complex sequences, the traditional
learning algorithm for HMMs is too computationally expensive for
use with real-world anomaly detection systems. This paper describes
the use of a novel incremental learning algorithm for HMMs that allows
the efficient acquisition of anomaly detection models. The new learning
algorithm requires less memory and training time than previous approaches
for learning discrete HMMs and can be used to perform online learning
of accurate baseline-models from complex computer applications to
support anomaly detection.},
annote = {comps{\_}anX},
author = {Florez-Larrahondo, German and Bridges, Susan M and Vaughn, Rayford},
booktitle = {Information Security},
doi = {10.1007/11556992_38},
isbn = {978-3-540-29001-8},
issn = {0302-9743 (Print) 1611-3349 (Online)},
pages = {506--514},
publisher = {Springer Berlin / Heidelberg},
series = {Lecture Notes in Computer Science},
title = {{Efficient Modeling of Discrete Events for Anomaly Detection Using Hidden Markov Models}},
url = {http://www.springerlink.com/content/eqdqtr9hwfyxwg3k/},
volume = {3650/2005},
year = {2005}
}
@article{koivisto2004exact,
annote = {comps{\_}models},
author = {Koivisto, M and Sood, K},
journal = {Journal of Machine Learning Research},
pages = {549--573},
title = {{Exact {\{}B{\}}ayesian Structure Discovery in {\{}B{\}}ayesian Networks}},
url = {http://citeseer.ist.psu.edu/article/koivisto04exact.html},
volume = {5},
year = {2004}
}
@article{fisher1936use,
author = {Fisher, R A},
journal = {Annals of Eugenics},
keywords = {classic classification linear-classification linea},
number = {7},
pages = {179--188},
title = {{The Use of Multiple Measurements in Taxonomic Problems}},
volume = {7},
year = {1936}
}
@inproceedings{yuan2004annealed,
author = {Yuan, Changhe and Lu, Tsai-Ching and Druzdzel, Marek},
booktitle = {Proceedings of the 20th Conference on Uncertainty in Artificial Intelligence},
pages = {628--635},
title = {{Annealed {\{}MAP{\}}}},
year = {2004}
}
@inproceedings{yu09Modulated,
author = {Yu, Jia Yuan and Mannor, Shie},
booktitle = {Joint 48th IEEE Conference on Decision and Control and 28th Chinese Control Conference},
pages = {2946--2953},
publisher = {IEEE Press},
title = {{Arbitrarily modulated {\{}M{\}}arkov decision processes}},
year = {2009}
}
@article{pwc2009,
author = {PricewaterhouseCoopers{\~{}}(PWC)},
journal = {Tech. Report},
title = {{Global city {\{}GDP{\}} rankings 2008-2025}},
year = {2009}
}
@article{AgHeTe88:Switching,
author = {Agrawal, R and Hedge, M V and Teneketzis, D},
journal = {IEEE Transactions on Automatic Control},
number = {10},
pages = {899--906},
title = {{Asymptotically Efficient Adaptive Allocation Rules for the Multiarmed Bandit Problem with Switching Cost}},
volume = {33},
year = {1988}
}
@inproceedings{jin2001mining,
address = {New York, NY, USA},
author = {Jin, Wen and Tung, Anthony K H and Han, Jiawei},
booktitle = {KDD '01: Proceedings of the seventh ACM SIGKDD international conference on Knowledge discovery and data mining},
doi = {http://doi.acm.org/10.1145/502512.502554},
isbn = {1-58113-391-X},
pages = {293--298},
publisher = {ACM Press},
title = {{Mining top-n local outliers in large databases}},
year = {2001}
}
@incollection{JN11b,
author = {Juditsky, A and Nemirovski, A},
booktitle = {Optimization for Machine Learning},
editor = {Sra, S and Nowozin, S and Wright, S},
pages = {149--183},
publisher = {MIT press},
title = {{First-Order Methods for Nonsmooth Convex Large-Scale Optimization, II: Utilizing Problem's Structure}},
year = {2011}
}
@article{tsybakov1998pointwise,
abstract = {The problem of nonparametric function estimation in the Gaussian white noise model is considered. It is assumed that the unknown function belongs to one of the Sobolev classes, with an unknown regularity parameter. Asymptotically exact adaptive estimators of functions are proposed on the scale of Sobolev classes, with respect to pointwise and sup-norm risks. It is shown that, unlike the case of L2-risk, a loss of efficiency under adaptation is inevitable here. Bounds on the value of the loss of efficiency are obtained.},
author = {Tsybakov, A. B.},
journal = {Annals of Statistics},
keywords = {Adaptive nonparametric estimation,Exact constants,Gaussian white noise,Loss of efficiency under adaptation,Minimax risk,Sobolev class},
number = {6},
pages = {2420--2469},
title = {{Pointwise and sup-norm sharp adaptive estimation of functions on the Sobolev classes}},
volume = {26},
year = {1998}
}
@inproceedings{ding2013multi-armed,
abstract = {We study the multi-armed bandit problems with budget constraint and variable costs (MAB-BV). In this setting, pulling an arm will receive a random reward together with a random cost, and the objective of an algorithm is to pull a sequence of arms in order to maximize the expected total reward with the costs of pulling those arms complying with a budget constraint. This new setting models many Internet applications (e.g., ad exchange, sponsored search, and cloud computing) in a more accurate manner than previous settings where the pulling of arms is either costless or with a fixed cost.We propose two UCB based algorithms for the new setting. The first algorithm needs prior knowledge about the lower bound of the expected costs when computing the exploration term. The second algorithm eliminates this need by estimating the minimal expected costs from empirical observations, and therefore can be applied to more real-world applications where pri- or knowledge is not available.We prove that both algorithms have nice learning abilities, with regret bounds of O(lnB). Furthermore, we show that when applying our proposed algorithms to a previous setting with fixed costs (which can be regarded as our special case), one can improve the previously obtained regret bound. Our simulation results on real-time bidding in ad exchange verify the effectiveness of the algorithms and are consistent with our theoretical analysis},
author = {Ding, Wenkui and Qin, Tao and Zhang, Xu-dong and Liu, Tie-yan},
booktitle = {Proceedings of the Twenty-Seventh AAAI Conference on Artificial Intelligence},
isbn = {9781577356158},
title = {{Multi-Armed Bandit with Budget Constraint and Variable Costs}},
url = {http://dblp.uni-trier.de/db/conf/aaai/aaai2013.html{\#}DingQZL13},
year = {2013}
}
@techreport{vershynin_note_2009,
author = {Vershynin, Roman},
title = {{A note on sums of independent random matrices after Ahlswede-Winter}},
url = {http://www.umich.edu/{~}romanv/teaching/reading-group/ahlswede-winter.pdf},
year = {2009}
}
@inproceedings{maes07sequencelabeling,
author = {Maes, Francis and Denoyer, Ludovic and Gallinari, Patrick},
pages = {648--657},
title = {{Sequence Labeling with Reinforcement Learning and Ranking Algorithms}}
}
@inproceedings{shapinglevelsets,
author = {Bach, F},
booktitle = {Adv. NIPS},
title = {{Shaping Level Sets with Submodular Functions}},
year = {2011}
}
@article{mairal2010online,
author = {Mairal, J and Bach, F and Ponce, J and Sapiro, G},
issn = {1532-4435},
journal = {Journal of Machine Learning Research},
pages = {19--60},
publisher = {MIT Press},
title = {{Online learning for matrix factorization and sparse coding}},
volume = {11},
year = {2010}
}
@article{pazzani2007content,
abstract = {This chapter discusses content-based recommendation systems, i.e., systems that recommend an item to a user based upon a description of the item and a profile of the users interests. Content-based recommendation systems may be used in a variety of domains ranging from recommending web pages, news articles, restaurants, television programs, and items for sale. Although the details of various systems differ, content-based recommendation systems share in common a means for describing the items that may be recommended, a means for creating a profile of the user that describes the types of items the user likes, and a means of comparing items to the user profile to determine what to re commend. The profile is often created and updated automatically in response to feedback on the desirability of items that have been presented to the user.},
author = {Pazzani, Michael J and Billsus, Daniel},
journal = {The adaptive web},
keywords = {review article},
mendeley-tags = {review article},
title = {{Content-Based Recommendation Systems}},
year = {2007}
}
@inproceedings{boutilier1995exploiting,
author = {Boutilier, Craig and Dearden, Richard and Goldszmidt, Mois{\'{e}}s},
booktitle = {Proceedings of the 14th International Joint Conference on Artificial Intelligence},
pages = {1104--1111},
title = {{Exploiting Structure in Policy Construction}},
year = {1995}
}
@article{iwata2009simple,
abstract = {This paper presents a new simple algorithm for minimizing submodular functions. For integer valued submodular functions, the algorithm runs in O(n6EO log nM) time, where n is the cardinality of the ground set, M is the maximum absolute value of the function value, and EO is the time for function evaluation. The algorithm can be improved to run in O ((n4EO+n5)log nM) time. The strongly polynomial version of this faster algorithm runs in O((n5EO + n6) log n) time for real valued general submodular functions. These are comparable to the best known running time bounds for submodular function minimization. The algorithm can also be implemented in strongly polynomial time using only additions, subtractions, comparisons, and the oracle calls for function evaluation. This is the first fully combinatorial submodular function minimization algorithm that does not rely on the scaling method.},
author = {Iwata, Satoru and Orlin, James B},
doi = {10.1006/jctb.2001.2072},
issn = {00958956},
journal = {Journal of Combinatorial Theory Series B},
number = {2},
pages = {1230--1237},
publisher = {Society for Industrial and Applied Mathematics},
title = {{A simple combinatorial algorithm for submodular function minimization}},
url = {http://portal.acm.org/citation.cfm?id=1496770.1496903},
volume = {84},
year = {2009}
}
@article{Seldin2011,
abstract = {We derive an instantaneous (per-round) data-dependent regret bound for stochastic multiarmed bandits with side information (also known as contextual bandits). The scaling of our regret bound with the number of states (contexts) N goes as sqrtN Irhot(S;A), where Irhot(S;A) is the mutual information between states and actions (the side information) used by the algorithm at round t. If the algorithm uses all the side information, the regret bound scales as sqrtN ln K, where K is the number of actions (arms). However, if the side information Irhot(S;A) is not fully used, the regret bound is significantly tighter. In the extreme case, when Irhot(S;A) = 0, the dependence on the number of states reduces from linear to logarithmic. Our analysis allows to provide the algorithm large amount of side information, let the algorithm to decide which side information is relevant for the task, and penalize the algorithm only for the side information that it is using de facto. We also present an algorithm for multiarmed bandits with side information with O(K) computational complexity per game round.},
author = {Seldin, Yevgeny and Auer, Peter and Laviolette, Fran{\c{c}}ois and Shawe-Taylor, John and Ortner, Ronald},
editor = {Shawe-Taylor, J and Zemel, R S and Bartlett, P and Pereira, F C N and Weinberger, K Q},
journal = {Computer},
keywords = {complacs,computational,information theoretic learning with statistics,learning,statistics {\&} optimisation,theory {\&} algorithms},
pages = {1--9},
title = {{PAC-Bayesian Analysis of Contextual Bandits}},
url = {http://eprints.pascal-network.org/archive/00008826/},
year = {2011}
}
@article{robbins1952some,
author = {Robbins, Herbert},
journal = {Bulletin of the American Mathematics Society},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {527--535},
title = {{Some aspects of the sequential design of experiments}},
volume = {58},
year = {1952}
}
@article{walsh2010integrating,
abstract = {Recent advancements in model-based reinforcement learn- ing have shown that the dynamics of many structured do- mains (e.g. DBNs) can be learned with tractable sample com- plexity, despite their exponentially large state spaces. Un- fortunately, these algorithms all require access to a planner that computes a near optimal policy, and while many tra- ditional MDP algorithms make this guarantee, their com- putation time grows with the number of states. We show how to replace these over-matched planners with a class of sample-based plannerswhose computation time is indepen- dent of the number of stateswithout sacrificing the sample- efficiency guarantees of the overall learning algorithms. To do so, we define sufficient criteria for a sample-based planner to be used in such a learning system and analyze two popu- lar sample-based approaches from the literature. We also in- troduce our own sample-based planner, which combines the strategies fromthese algorithms and stillmeets the criteria for integration into our learning system. In doing so, we define the first complete RL solution for compactly represented (ex- ponentially sized) state spaces with efficiently learnable dy- namics that is both sample efficient and whose computation time does not grow rapidly with the number of states.},
author = {Walsh, Thomas J and Goschin, Sergiu and Littman, Michael L},
journal = {AAAI Conference on Artificial Intelligence},
title = {{Integrating sample-based planning and model-based reinforcement learning}},
year = {2010}
}
@article{jaynes57maxent,
author = {Jaynes, E T},
journal = {Physical Review},
number = {4},
pages = {620--630},
publisher = {American Physical Society},
title = {{Information Theory and Statistical Mechanics}},
volume = {106},
year = {1957}
}
@inproceedings{2004advances,
booktitle = {NIPS},
title = {{Advances in Neural Information Processing Systems 17 [Neural Information Processing Systems, NIPS 2004, December 13-18, 2004, Vancouver, British Columbia, Canada]}},
year = {2004}
}
@book{koller2009bayesian,
annote = {comps{\_}models},
author = {Koller, Daphne and Friedman, Nir},
keywords = {bibtex-import},
publisher = {MIT Press},
title = {{Probabilistic Graphical Models: Principles and Techniques}},
year = {2009}
}
@inproceedings{hertz2006learning,
address = {New York, NY, USA},
annote = {comps{\_}distance},
author = {Hertz, Tomer and Hillel, Aharon Bar and Weinshall, Daphna},
booktitle = {ICML '06: Proceedings of the 23rd international conference on Machine learning},
doi = {http://doi.acm.org/10.1145/1143844.1143895},
isbn = {1-59593-383-2},
pages = {401--408},
publisher = {ACM},
title = {{Learning a kernel function for classification with small training samples}},
year = {2006}
}
@inproceedings{joshi2005investigating,
address = {New York, NY, USA},
annote = {comps{\_}anX},
author = {Joshi, Shrijit S and Phoha, Vir V},
booktitle = {ACM-SE 43: Proceedings of the 43rd annual Southeast regional conference},
doi = {http://doi.acm.org/10.1145/1167350.1167387},
isbn = {1-59593-059-0},
pages = {98--103},
publisher = {ACM},
title = {{Investigating hidden Markov models capabilities in anomaly detection}},
year = {2005}
}
@article{bousquet2002stability,
author = {Bousquet, Olivier and Elisseeff, Andre},
journal = {Journal of Machine Learning Research},
pages = {499--526},
title = {{Stability and Generalization}},
volume = {2},
year = {2002}
}
@article{blei2003latent,
address = {Cambridge, MA, USA},
annote = {comps{\_}models},
author = {Blei, David M and Ng, Andrew Y and Jordan, Michael I},
issn = {1533-7928},
journal = {J. Mach. Learn. Res.},
pages = {993--1022},
publisher = {MIT Press},
title = {{Latent dirichlet allocation}},
url = {http://delivery.acm.org/10.1145/950000/944937/3-993-blei.pdf?key1=944937{\&}key2=6322955221{\&}coll=GUIDE{\&}dl=GUIDE{\&}CFID=8754681{\&}CFTOKEN=61191808},
volume = {3},
year = {2003}
}
@book{pinter1995global,
author = {Pint{\'{e}}r, J{\'{a}}nos},
isbn = {9780792337577},
publisher = {Springer},
series = {Nonconvex Optimization and Its Applications},
title = {{Global Optimization in Action: Continuous and Lipschitz Optimization: Algorithms, Implementations and Applications}},
url = {http://books.google.fr/books?id=G8pF982ckNsC},
year = {1995}
}
@inproceedings{geman02dynamic,
address = {Morristown, NJ, USA},
author = {Geman, Stuart and Johnson, Mark},
booktitle = {ACL '02: Proceedings of the 40th Annual Meeting of the Association for Computational Linguistics},
doi = {http://dx.doi.org/10.3115/1073083.1073130},
pages = {279--286},
publisher = {Association for Computational Linguistics},
title = {{Dynamic programming for parsing and estimation of stochastic unification-based grammars}},
year = {2001}
}
@article{wolsey,
author = {Wolsey, Laurence A},
journal = {Mathematics of Operations Research},
number = {3},
pages = {pp. 410--425},
publisher = {INFORMS},
title = {{Maximising Real-Valued Submodular Functions: Primal and Dual Heuristics for Location Problems}},
volume = {7},
year = {1982}
}
@article{Fre75,
author = {Freedman, D},
journal = {The Annals of Probability},
pages = {100--118},
title = {{On tail probabilities for martingales}},
volume = {3},
year = {1975}
}
@inproceedings{konda2000actor-critic,
author = {Konda, Vijay and Tsitsiklis, John},
booktitle = {Advances in Neural Information Processing Systems 12},
pages = {1008--1014},
title = {{Actor-Critic Algorithms}},
year = {2000}
}
@inproceedings{Cevher2008,
author = {Cevher, V and Duarte, M F and Hegde, C and Baraniuk, R G},
booktitle = {Adv. NIPS},
title = {{Sparse signal recovery using {\{}M{\}}arkov random fields}},
year = {2008}
}
@inproceedings{shalev07pegasos,
author = {Shalev-Shwartz, Shai and Singer, Yoram and Srebro, Nathan},
doi = {http://doi.acm.org/10.1145/1273496.1273598},
pages = {807--814},
title = {{{\{}P{\}}egasos: {\{}P{\}}rimal {\{}E{\}}stimated sub-{\{}G{\}}r{\{}A{\}}dient {\{}SO{\}}lver for {\{}SVM{\}}}}
}
@inproceedings{neu12o-mdp-full,
annote = {From Duplicate 1 (Online {\{}M{\}}arkov Decision Processes under Bandit Feedback - Neu, G; Gy{\"{o}}rgy, A; Szepesv{\'{a}}ri, $\backslash$textCs; Antos, A)
Accepted for publication},
author = {Neu, G and Gy{\"{o}}rgy, A and Szepesv{\'{a}}ri, $\backslash$relax Cs. $\backslash$textCs and Antos, A},
booktitle = {IEEE Transactions on Automatic Control},
pages = {1804--1812},
title = {{Online {\{}M{\}}arkov Decision Processes under Bandit Feedback}},
year = {2013}
}
@article{thompson1933likelihood,
author = {Thompson, William R.},
journal = {Biometrika},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {285--294},
title = {{On the likelihood that one unknown probability exceeds another in view of the evidence of two samples}},
volume = {25},
year = {1933}
}
@article{cooper1992bayesian,
abstract = {This paper presents a Bayesian method for constructing probabilistic
networks from databases. In particular, we focus on constructing
Bayesian belief networks. Potential applications include computer-assisted
hypothesis testing, automated scientific discovery, and automated
construction of probabilistic expert systems. We extend the basic
method to handle missing data and hidden (latent) variables. We show
how to perform probabilistic inference by averaging over the inferences
of multiple belief networks. Results are presented of a preliminary
evaluation of an algorithm for constructing a belief network from
a database of cases. Finally, we relate the methods in this paper
to previous work, and we discuss open problems.},
author = {Cooper, G F and Herskovits, E},
issn = {0885-6125},
journal = {Machine Learning},
keywords = {algorithm,bayesian,induction,learning,network,structure},
month = {oct},
number = {4},
pages = {309--347},
title = {{A Bayesian Method for the Induction of Probabilistic Networks from Data}},
url = {http://www.ingentaconnect.com/content/klu/mach/1992/00000009/00000004/00422779},
volume = {09},
year = {1992}
}
@article{spielman2011graph,
author = {Spielman, Daniel A and Srivastava, Nikhil},
journal = {Journal on Computing},
number = {6},
title = {{Graph sparsification by effective resistances}},
volume = {40},
year = {2011}
}
@inproceedings{kivinen2002online,
address = {Cambridge, MA},
author = {Kivinen, J and Smola, A J and Williamson, R C},
booktitle = {Advances in Neural Information Processing Systems 14},
editor = {Dietterich, T G and Becker, S and Ghahramani, Z},
publisher = {MIT Press},
title = {{Online Learning with Kernels}},
year = {2002}
}
@inproceedings{grabner2008semi-supervised,
author = {Grabner, Helmut and Leistner, Christian and Bischof, Horst},
booktitle = {Proceedings of the 10th European Conference on Computer Vision},
pages = {234--247},
title = {{Semi-Supervised On-Line Boosting for Robust Tracking}},
year = {2008}
}
@inproceedings{sprechmann2010collaborative,
author = {Sprechmann, P and Ramirez, I and Sapiro, G and Eldar, Y},
booktitle = {Conf. Information Sciences and Systems (CISS)},
title = {{Collaborative hierarchical sparse modeling}},
year = {2010}
}
@article{Langford2007,
abstract = {We present Epoch-Greedy, an algorithm for contextual multi-armed bandits (also known as bandits with side information). Epoch-Greedy has the following properties: 1. No knowledge of a time horizon T is necessary. 2. The regret incurred by Epoch-Greedy is controlled by a sample complexity bound for a hypothesis class. 3. The regret scales as O(T 2/3 S 1/3) or better (sometimes, much better). Here S is the complexity term in a sample complexity bound for standard supervised learning. 1},
author = {Langford, John},
editor = {Platt, J C and Koller, D and Singer, Y and Roweis, S},
journal = {Statistics},
pages = {1--8},
publisher = {Citeseer},
title = {{The Epoch-Greedy Algorithm for Contextual Multi-armed Bandits}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.143.8000{\&}rep=rep1{\&}type=pdf},
volume = {20},
year = {2007}
}
@article{russo2014learning,
author = {Russo, Daniel and {Van Roy}, Benjamin},
journal = {Mathematics of Operations Research},
title = {{Learning to Optimize Via Posterior Sampling}},
year = {2014}
}
@article{ripeanu2002mapping,
abstract = {We studied the topology and protocols of the public Gnutella network. Its substantial user base and open architecture make it a good large-scale, if uncontrolled, testbed. We captured the network's topology, generated traffic, and dynamic behavior to determine its connectivity structure and how well (if at all) Gnutella's overlay network topology maps to the physical Internet infrastructure. Our analysis of the network allowed us to evaluate costs and benefits of the peer-to-peer (P2P) approach and to investigate possible improvements that would allow better scaling and increased reliability in Gnutella and similar networks. A mismatch between Gnutella's overlay network topology and the Internet infrastructure has critical performance implications},
author = {Ripeanu, Matei and Iamnitchi, Adriana and Foster, Ian},
doi = {10.1109/4236.978369},
issn = {10897801},
journal = {IEEE Internet Computing},
number = {1},
pages = {50--57},
title = {{Mapping the Gnutella network}},
volume = {6},
year = {2002}
}
@article{joachims1999making,
abstract = {Training a support vector machine (SVM) leads to a quadratic optimization problem with bound constraints and one linear equality constraint. Despite the fact that this type of problem is well understood, there are many issues to be considered in designing an SVM learner. In particular, for large learning tasks with many training examples, o -the-shelf optimization techniques for general quadratic programs quickly become intractable in their memory and time requirements. S V Mlight1 is an implementation of an SVM learner which addresses the problem of large tasks. This chapter presents algorithmic and computational results developed for S V MlightV2.0, which make large-scale SVM training more practical. The results give guidelines for the application of SVMs to large domains},
author = {Joachims, Thorsten},
chapter = {11},
doi = {10.1109/ICEMI.2009.5274151},
editor = {Sch{\"{o}}lkopf, B and Burges, C and Smola, A},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Joachims - 1999 - Making large-Scale SVM Learning Practical.pdf:pdf},
institution = {University of Dortmund},
isbn = {9781424438631},
journal = {Advances in Kernel Methods Support Vector Learning},
pages = {169--184},
publisher = {MIT Press},
series = {Advances in Kernel Methods - Support Vector Learning},
title = {{Making large-Scale SVM Learning Practical}},
url = {http://www-ai.cs.uni-dortmund.de/DOKUMENTE/joachims{\_}99a.ps.gz},
year = {1999}
}
@incollection{fergus2009semi-supervised,
author = {Fergus, Rob and Weiss, Yair and Torralba, Antonio},
booktitle = {Advances in Neural Information Processing Systems 22},
editor = {Bengio, Y and Schuurmans, D and Lafferty, J and Williams, C K I and Culotta, A},
pages = {522--530},
publisher = {NIPS Foundation (http://books.nips.cc)},
title = {{Semi-Supervised Learning in Gigantic Image Collections}},
year = {2009}
}
@phdthesis{guestrin2003planning,
author = {Guestrin, Carlos},
school = {Stanford University},
title = {{Planning Under Uncertainty in Complex Structured Environments}},
year = {2003}
}
@article{cap,
author = {Zhao, P and Rocha, G and Yu, B},
journal = {Annals of Statistics},
number = {6A},
pages = {3468--3497},
title = {{Grouped and hierarchical model selection through composite absolute penalties}},
volume = {37},
year = {2009}
}
@article{amari,
author = {Amari, S},
journal = {Neural Computation},
number = {2},
pages = {251--276},
title = {{Natural Gradient Works Efficiently in Learning}},
volume = {10},
year = {1998}
}
@inproceedings{gao2006novel,
annote = {comps{\_}anX},
author = {Gao, Jing and Cheng, Haibin and Tan, Pang-Ning},
booktitle = {SDM},
title = {{A Novel Framework for Incorporating Labeled Examples into Anomaly Detection.}},
url = {http://www.siam.org/meetings/sdm06/proceedings/068gaoj2.pdf},
year = {2006}
}
@article{bala1998learning,
abstract = {When payoffs from different actions are unknown, agents use their own past experience as well as the experience of their neighbours to guide their decision making. In this paper, we develop a general framework to study the relationship between the structure of these neighbourhoods and the process of social learning. We show that, in a connected society, local learning ensures that all agents obtain the same payoffs in the long run. Thus, if actions have different payoffs, then all agents choose the same action, and social conformism obtains. We develop conditions on the distribution of prior beliefs, the structure of neighbourhoods and the informativeness of actions under which this action is optimal. In particular, we identify a property of neighbourhood structures-local independence-which greatly facilitates social learning. Simulations of the model generate spatial and temporal patterns of adoption that are consistent with empirical work.},
author = {Bala, Venkatesh and Goyal, Sanjeev},
journal = {Review of Economic Studies},
number = {3},
pages = {595--621},
title = {{Learning from neighbours}},
volume = {65},
year = {1998}
}
@inproceedings{papadimitriou2003cross-outlier,
author = {Papadimitriou, Spiros and Faloutsos, Christos},
booktitle = {Advances in Spatial and Temporal Databases, 8th International Symposium, SSTD 2003, Santorini Island, Greece, July 24-27, 2003, Proceedings},
editor = {Hadzilacos, Thanasis and Manolopoulos, Yannis and Roddick, John F and Theodoridis, Yannis},
pages = {199--213},
title = {{Cross-Outlier Detection}},
volume = {2750},
year = {2003}
}
@inproceedings{allenberg2006hannan,
author = {Allenberg, Chamy and Auer, Peter and Gy{\"{o}}rfi, L{\'{a}}szl{\'{o}} and Ottucs{\'{a}}k, Gy{\"{o}}rgy},
booktitle = {Algorithmic Learning Theory},
title = {{Hannan consistency in on-line learning in case of unbounded losses under partial monitoring}},
year = {2006}
}
@article{coulom2007efficient,
abstract = {A Monte-Carlo evaluation consists in estimating a position by averaging the outcome of several random continuations. The method can serve as an evaluation function at the leaves of a min-max tree. This paper presents a new framework to combine tree search with Monte-Carlo evaluation, that does not separate between a min-max phase and a Monte-Carlo phase. Instead of backing-up the min-max value close to the root, and the average value at some depth, a more general backup operator is defined that progressively changes from averaging to min-max as the number of simulations grows. This approach provides a fine-grained control of the tree growth, at the level of individual simulations, and allows efficient selectivity. The resulting algorithm was implemented in a 9x9 Go-playing program, Crazy Stone, that won the 10th KGS computer-Go tournament.},
author = {Coulom, R{\'{e}}mi},
journal = {Computers and games},
pages = {72--83},
title = {{Efficient selectivity and backup operators in Monte-Carlo tree search}},
volume = {4630},
year = {2007}
}
@inproceedings{ratliff2007boosting,
address = {Cambridge, MA},
author = {Ratliff, Nathan and Bradley, David and Bagnell, J Andrew (Drew) and Chestnutt, Joel},
booktitle = {Advances in Neural Information Processing Systems 19},
editor = {Sch{\"{o}}lkopf, B and Platt, J C and Hofmann, T},
publisher = {MIT Press},
title = {{Boosting Structured Prediction for Imitation Learning}},
url = {http://www-clmc.usc.edu/publications/B/bagnell-NIPS2006.pdf},
year = {2007}
}
@inproceedings{stobbe,
author = {Stobbe, P and Krause, A},
booktitle = {Adv. NIPS},
title = {{Efficient Minimization of Decomposable Submodular Functions}},
year = {2010}
}
@inproceedings{LaurentGuillaumeGroupLasso,
author = {Jacob, L and Obozinski, G and Vert, J.-P.},
booktitle = {Proc. ICML},
title = {{Group {\{}L{\}}asso with overlaps and graph {\{}L{\}}asso}},
year = {2009}
}
@book{jannach2010recommender,
author = {Jannach, Dietmar and Zanker, Markus and Felfernig, Alexander and Friedrich, Gerhard},
publisher = {Cambridge University Press},
title = {{Recommender systems: An introduction}},
year = {2010}
}
@inproceedings{marecki2007fast,
author = {Marecki, Janusz and Koenig, Sven and Tambe, Milind},
booktitle = {Proceedings of the 20th International Joint Conference on Artificial Intelligence},
title = {{A Fast Analytical Algorithm for Solving {\{}Markov{\}} Decision Processes with Continuous Resources}},
year = {2007}
}
@book{cormen89introduction,
author = {Cormen, T H and Leiserson, C E and Rivest, R L},
publisher = {MIT Press},
title = {{Introduction to Algorithms}},
year = {1989}
}
@inproceedings{dvijotham2010inverse,
abstract = {We present new algorithms for inverse optimal control (or inverse$\backslash$nreinforcement learning, IRL) within the framework of linearlysolvable$\backslash$nMDPs (LMDPs). Unlike most prior IRL algorithms which recover only$\backslash$nthe control policy of the expert, we recover the policy, the value$\backslash$nfunction and the cost function. This is possible because here the$\backslash$ncost and value functions are uniquely deffined given the policy.$\backslash$nDespite these special properties, we can handle a wide variety of$\backslash$nproblems such as the grid worlds popular in RL and most of the nonlinear$\backslash$nproblems arising in robotics and control engineering. Direct comparisons$\backslash$nto prior IRL algorithms show that our new algorithms provide more$\backslash$ninformation and are orders of magnitude faster. Indeed our fastest$\backslash$nalgorithm is the first inverse algorithm which does not require solving$\backslash$nthe forward problem; instead it performs unconstrained optimization$\backslash$nof a convex and easy-to-compute log-likelihood. Our work also sheds$\backslash$nlight on the recent Maximum Entropy (MaxEntIRL) algorithm, which$\backslash$nwas dened in terms of density estimation and the corresponding forward$\backslash$nproblem was left unspecified. We show that MaxEntIRL is inverting$\backslash$nan LMDP, using the less efficient of the algorithms derived here.$\backslash$nUnlike all prior IRL algorithms which assume pre-existing features,$\backslash$nwe study feature adaptation and show that such adaptation is essential$\backslash$nin continuous state spaces.},
author = {Dvijotham, Krishnamurthy and Todorov, Emanuel},
booktitle = {ICML},
keywords = {dblp},
pages = {335--342},
title = {{Inverse Optimal Control with Linearly-Solvable MDPs.}},
year = {2010}
}
@inproceedings{mary2015bandits,
abstract = {This paper addresses the on-line recommendation problem facing new users and new items; we assume that no information is available neither about users, nor about the items. The only source of information is a set of ratings given by users to some items. By on-line, we mean that the set of users, and the set of items, and the set of ratings is evolving along time and that at any moment, the recommendation system has to select items to recommend based on the currently available information, that is basically the sequence of past events. We also mean that each user comes with her preferences which may evolve along short and longer scales of time; so we have to continuously update their preferences. When the set of ratings is the only available source of information , the traditional approach is matrix factorization. In a decision making under uncertainty setting, actions should be selected to balance exploration with exploitation; this is best modeled as a bandit problem. Matrix factors provide a latent representation of users and items. These representations may then be used as contextual information by the bandit algorithm to select items. This last point is exactly the originality of this paper: the combination of matrix factorization and bandit algorithms to solve the on-line recommendation problem. Our work is driven by considering the recommendation problem as a feedback controlled loop. This leads to interactions between the representation learning, and the recommendation policy.},
author = {Mary, J{\'{e}}r{\'{e}}mie and Gaudel, Romaric and Preux, Philippe},
booktitle = {First International Workshop on Machine Learning, Optimization, and Big Data},
keywords = {Collaborative Filtering,Matrix Factorization,Multi-Armed Bandtis,Recommender Systems,contextual Bandits,sequential Recommender Systems},
title = {{Bandits and recommender systems}},
year = {2015}
}
@inproceedings{li2005lazy,
author = {Li, Lihong and Littman, Michael},
booktitle = {Proceedings of the 20th National Conference on Artificial Intelligence},
pages = {1175--1180},
title = {{Lazy Approximation for Solving Continuous Finite-Horizon {\{}MDPs{\}}}},
year = {2005}
}
@book{shor85minimization,
address = {New York, NY, USA},
author = {Shor, N Z and Kiwiel, K C and Ruszczynski, A},
isbn = {0-387-12763-1},
publisher = {Springer-Verlag New York, Inc.},
title = {{Minimization methods for non-differentiable functions}},
year = {1985}
}
@inproceedings{Venkataraman-2004-Mislabeled,
author = {Venkataraman, S and Fradkin, D Metxas D and Kulikowski., C},
booktitle = {16th IEEE International Conference on Tools with Artificial Intelligence},
pages = {356--361},
title = {{Distinguishing Mislabeled Data from Correctly Labeled Data in Classifier Design.}},
year = {2004}
}
@inproceedings{wong2003bayesian,
annote = {comps{\_}ano},
author = {Wong, Weng Keen and Moore, Andrew and Cooper, Gregory and Wagner, Michael},
booktitle = {Proceedings of the 20th International Conference on Machine Learning (ICML-2003)},
title = {{Bayesian Network Anomaly Pattern Detection for Disease Outbreaks}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.5.1245},
year = {2003}
}
@article{Bubeck2013a,
archivePrefix = {arXiv},
arxivId = {arXiv:1205.3181v1},
author = {Bubeck, S. and Wang, T. and Viswanathan, N.},
eprint = {arXiv:1205.3181v1},
journal = {Proceedings of the 30th International Conference on Machine Learning (ICML)},
title = {{Multiple identifications in multi-armed bandits}},
volume = {28},
year = {2013}
}
@article{johnson74approximation,
author = {Johnson, David},
journal = {Journal of Computer and System Sciences},
number = {3},
pages = {256--278},
title = {{Approximation Algorithms for Combinatorial Problems}},
volume = {9},
year = {1974}
}
@article{breunig2000lof:,
address = {New York, NY, USA},
author = {Breunig, Markus M and Kriegel, Hans-Peter and Ng, Raymond T and Sander, J{\"{o}}rg},
doi = {http://doi.acm.org/10.1145/335191.335388},
issn = {0163-5808},
journal = {SIGMOD Rec.},
number = {2},
pages = {93--104},
publisher = {ACM},
title = {{LOF: identifying density-based local outliers}},
volume = {29},
year = {2000}
}
@inproceedings{aggarwal2001outlier,
address = {New York, NY, USA},
annote = {comps{\_}ano},
author = {Aggarwal, Charu C and Yu, Philip S},
booktitle = {SIGMOD '01: Proceedings of the 2001 ACM SIGMOD international conference on Management of data},
doi = {http://doi.acm.org/10.1145/375663.375668},
isbn = {1-58113-332-4},
pages = {37--46},
publisher = {ACM},
title = {{Outlier detection for high dimensional data}},
year = {2001}
}
@techreport{krause2005note,
author = {Krause, Andreas and Guestrin, Carlos},
booktitle = {Technical Rep No CMUCALD},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Krause, Guestrin - 2005 - A Note on the Budgeted Maximization of Submodular Functions.pdf:pdf},
institution = {CMU},
keywords = {constraints,entropy maximization,optimization,submodular functions},
number = {June},
pages = {1--7},
publisher = {Citeseer},
title = {{A Note on the Budgeted Maximization of Submodular Functions}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.91.9721{\&}rep=rep1{\&}type=pdf},
volume = {5},
year = {2005}
}
@article{tsitsiklis1997analysis,
author = {Tsitsiklis, John and {Van Roy}, Benjamin},
journal = {IEEE Transactions on Automatic Control},
number = {5},
pages = {674--690},
title = {{An Analysis of Temporal-Difference Learning with Function Approximation}},
volume = {42},
year = {1997}
}
@article{gyorgy06adaptive,
address = {Oxford, UK},
author = {Gy{\"{o}}rgy, Andr{\'{a}}s and Ottucs{\'{a}}k, $\backslash$textGy{\"{o}}rgy},
doi = {10.1093/comjnl/bxh168},
issn = {0010-4620},
journal = {Computer Journal},
keywords = {adaptive routing,machine learning,network routing},
number = {2},
pages = {180--189},
publisher = {Oxford University Press},
title = {{Adaptive Routing Using Expert Advice}},
url = {http://dx.doi.org/10.1093/comjnl/bxh168},
volume = {49},
year = {2006}
}
@inproceedings{gopalan2013thompson,
abstract = {We consider stochastic multi-armed bandit problems with complex actions over a set of basic arms, where the decision maker plays a complex action rather than a basic arm in each round. The reward of the complex action is some function of the basic arms' rewards, and the feedback observed may not necessarily be the reward per-arm. For instance, when the complex actions are subsets of the arms, we may only observe the maximum reward over the chosen subset. Thus, feedback across complex actions may be coupled due to the nature of the reward function. We prove a frequentist regret bound for Thompson sampling in a very general setting involving parameter, action and observation spaces and a likelihood function over them. The bound holds for discretely-supported priors over the parameter space and without additional structural properties such as closed-form posteriors, conjugate prior structure or independence across arms. The regret bound scales logarithmically with time but, more importantly, with an improved constant that non-trivially captures the coupling across complex actions due to the structure of the rewards. As applications, we derive improved regret bounds for classes of complex bandit problems involving selecting subsets of arms, including the first nontrivial regret bounds for nonlinear MAX reward feedback from subsets.},
author = {Gopalan, Aditya and Mannor, Shie and Mansour, Yishay},
booktitle = {International Conference on Machine Learning},
title = {{Thompson sampling for complex bandit problems}},
year = {2014}
}
@article{mitra2002density-based,
author = {Mitra, P and Murthy, C A and Pal, S K},
journal = {IEEE Transactions on PAMI},
number = {6},
pages = {1--14},
title = {{Density-based multiscale data condensation}},
volume = {24},
year = {2002}
}
@unpublished{jordan2003introduction,
annote = {A textbook for a probabilistic graphical models class
comps{\_}models},
author = {Jordan, Michael I},
keywords = {bibtex-import},
title = {{An Introduction to Probabilistic Graphical Models}},
year = {2003}
}
@article{metropolis1949monte,
abstract = {Abstract We shall present here the motivation and a general description of a method dealing with a class of problems in mathematical physics. The method is, essentially, a statistical approach to the study of differential equations, or more generally, of integro-differential equations that occur in various branches of the natural sciences. Abstract We shall present here the motivation and a general description of a method dealing with a class of problems in mathematical physics. The method is, essentially, a statistical approach to the study of differential equations, or more generally, of integro-differential equations that occur in various branches of the natural sciences.},
author = {Metropolis, Nicholas and Ulam, S.},
journal = {Journal of the American Statistical Association},
number = {247},
pages = {335--341},
title = {{The Monte Carlo method}},
volume = {44},
year = {1949}
}
@incollection{wahba1999support,
address = {Cambridge, MA},
author = {Wahba, Grace},
booktitle = {Advances in Kernel Methods: Support Vector Learning},
pages = {69--88},
publisher = {MIT Press},
title = {{Support Vector Machines, Reproducing Kernel Hilbert Spaces, and Randomized GACV}},
year = {1999}
}
@inproceedings{Rao2011,
author = {Rao, N S and Nowak, R D and Wright, S J and Kingsbury, N G},
booktitle = {International Conference on Image Processing (ICIP)},
title = {{Convex approaches to model wavelet sparsity patterns}},
year = {2011}
}
@article{kendall1945treatment,
author = {Kendall, M G},
journal = {Biometrika},
pages = {239--251},
title = {{The treatment of ties in ranking problems}},
volume = {33},
year = {1945}
}
@inproceedings{colt2010,
author = {{Daniel Golovin} and {Andreas Krause}},
booktitle = {23rd Annual Conference on Learning Theory},
pages = {333--345},
title = {{Adaptive Submodularity: A New Approach to Active Learning
and Stochastic Optimization}},
year = {2010}
}
@article{hastie1996discriminant,
annote = {comps{\_}distance},
author = {Hastie, T and Tibshirani, R},
doi = {10.1109/34.506411},
issn = {0162-8828},
journal = {Pattern Analysis and Machine Intelligence, IEEE Transactions on},
keywords = {adaptive systems,approximation theory,centroid information,curse of dimensionality,global dimension reduction,linear discriminant analysis,local decision boundaries,neighbourhood-based classifier,pattern classification,pattern recognition,posterior probability,probabilityadaptive nearest neighbor classificati},
month = {jun},
number = {6},
pages = {607--616},
title = {{Discriminant adaptive nearest neighbor classification}},
volume = {18},
year = {1996}
}
@inproceedings{Jenatton2010,
author = {Jenatton, R and Obozinski, G and Bach, F},
booktitle = {Proc. AISTATS},
title = {{Structured sparse principal component analysis}},
year = {2009}
}
@article{kapralov_single_2014,
abstract = {We present the first single pass algorithm for computing spectral sparsifiers of graphs in the dynamic semi-streaming model. Given a single pass over a stream containing insertions and deletions of edges to a graph G, our algorithm maintains a randomized linear sketch of the incidence matrix of G into dimension O((1/epsilon{\{}$\backslash$textasciicircum{\}}2) n polylog(n)). Using this sketch, at any point, the algorithm can output a (1 +/- epsilon) spectral sparsifier for G with high probability. While O((1/epsilon{\{}$\backslash$textasciicircum{\}}2) n polylog(n)) space algorithms are known for computing "cut sparsifiers" in dynamic streams [AGM12b, GKP12] and spectral sparsifiers in "insertion-only" streams [KL11], prior to our work, the best known single pass algorithm for maintaining spectral sparsifiers in dynamic streams required sketches of dimension Omega((1/epsilon{\{}$\backslash$textasciicircum{\}}2) n{\{}$\backslash$textasciicircum{\}}(5/3)) [AGM14]. To achieve our result, we show that, using a coarse sparsifier of G and a linear sketch of G's incidence matrix, it is possible to sample edges by effective resistance, obtaining a spectral sparsifier of arbitrary precision. Sampling from the sketch requires a novel application of ell{\_}2/ell{\_}2 sparse recovery, a natural extension of the ell{\_}0 methods used for cut sparsifiers in [AGM12b]. Recent work of [MP12] on row sampling for matrix approximation gives a recursive approach for obtaining the required coarse sparsifiers. Under certain restrictions, our approach also extends to the problem of maintaining a spectral approximation for a general matrix A{\{}$\backslash$textasciicircum{\}}T A given a stream of updates to rows in A.},
annote = {arXiv: 1407.1289},
author = {Kapralov, Michael and Lee, Yin Tat and Musco, Cameron and Musco, Christopher and Sidford, Aaron},
journal = {arXiv:1407.1289 [cs]},
keywords = {Computer Science - Data Structures and Algorithms},
month = {jul},
title = {{Single {\{}Pass{\}} {\{}Spectral{\}} {\{}Sparsification{\}} in {\{}Dynamic{\}} {\{}Streams{\}}}},
url = {http://arxiv.org/abs/1407.1289},
year = {2014}
}
@article{domingos1997optimality,
author = {Domingos, Pedro and Pazzani, Michael J},
journal = {Machine Learning},
number = {2-3},
pages = {103--130},
title = {{On the Optimality of the Simple Bayesian Classifier under Zero-One Loss}},
url = {citeseer.ist.psu.edu/article/domingos97optimality.html},
volume = {29},
year = {1997}
}
@inproceedings{Kveton2015,
abstract = {A stochastic combinatorial semi-bandit is an online learning problem where at each step a learning agent chooses a subset of ground items subject to constraints, and then observes stochastic weights of these items and receives their sum as a payoff. In this paper, we close the problem of computationally and sample efficient learning in stochastic combinatorial semi-bandits. In particular, we analyze a UCB-like algorithm for solving the problem, which is known to be computationally efficient; and prove {\$}O(K L (1 / \backslashDelta) \backslashlog n){\$} and {\$}O(\backslashsqrt{\{}K L n \backslashlog n{\}}){\$} upper bounds on its {\$}n{\$}-step regret, where {\$}L{\$} is the number of ground items, {\$}K{\$} is the maximum number of chosen items, and {\$}\backslashDelta{\$} is the gap between the expected returns of the optimal and best suboptimal solutions. The gap-dependent bound is tight up to a constant factor and the gap-free bound is tight up to a polylogarithmic factor.},
archivePrefix = {arXiv},
arxivId = {1410.0949},
author = {Kveton, Branislav and Wen, Zheng and Ashkan, Azin and Szepesvari, Csaba},
booktitle = {Proceedings of AISTATS},
eprint = {1410.0949},
issn = {15337928},
title = {{Tight Regret Bounds for Stochastic Combinatorial Semi-Bandits}},
url = {http://arxiv.org/abs/1410.0949},
volume = {38},
year = {2015}
}
@inproceedings{boyan1995generalization,
author = {Boyan, Justin and Moore, Andrew},
booktitle = {Advances in Neural Information Processing Systems 7},
pages = {369--376},
title = {{Generalization in Reinforcement Learning: Safely Approximating the Value Function}},
year = {1995}
}
@inproceedings{moore1991variable,
author = {Moore, Andrew},
booktitle = {Proceedings of the 8th International Conference on Machine Learning},
title = {{Variable Resolution Dynamic Programming: Efficiently Learning Action Maps in Multivariate Real-Valued State-Spaces}},
year = {1991}
}
@inproceedings{CWY13,
author = {Chen, Wei and Wang, Yajun and Yuan, Yang},
booktitle = {International Conference on Machine Learning},
pages = {151--159},
title = {{Combinatorial Multi-Armed Bandit: General Framework and Applications}},
year = {2013}
}
@article{lo2006evaluation,
abstract = {BACKGROUND: Heparin-induced thrombocytopenia (HIT) is a prothrombotic
adverse drug reaction caused by heparin. As thrombocytopenia is common
in hospitalized patients receiving heparin, it would be useful to
have a clinical scoring system that could differentiate patients
with HIT from those with other reasons for thrombocytopenia. AIM:
To compare prospectively the diagnostic utility of a clinical score
for HIT in two different clinical settings. METHODS: The pretest
clinical scoring system, the '4 T's', was used to classify 100 consecutive
patients referred for possible HIT in one hospital (Hamilton General
Hospital, HGH) into high, intermediate, and low probability groups.
This system was also used to classify likewise 236 patients by clinicians
in Germany referring blood for diagnostic testing for HIT in Greifswald
(GW). The clinical scores were correlated with the results of laboratory
testing for HIT antibodies using the serologic criteria for HIT with
high diagnostic specificity. RESULTS: In both centers, patients with
low scores were unlikely to test positive for HIT antibodies [HGH:
1/64 (1.6{\%}), GW: 0/55 (0{\%})]. Patients with intermediate [HGH: 8/28
(28.6{\%}), GW: 11/139 (7.9{\%})] or high scores [HGH: 8/8 (100{\%}), GW:
9/42 (21.4{\%})] were more likely to test positive for clinically significant
HIT antibodies. The positive predictive value of an intermediate
or high clinical score for clinically significant HIT antibodies
was higher at one center (HGH). CONCLUSIONS: A low pretest clinical
score for HIT seems to be suitable for ruling out HIT in most situations
(high-negative predictive value). The implications of an intermediate
or high score vary in different clinical settings.},
author = {Lo, G K and Juhl, D and Warkentin, T E and Sigouin, C S and Eichler, P and Greinacher, A},
doi = {10.1111/j.1538-7836.2006.01787.x},
institution = {Department of Medicine, McMaster University, Hamilton, ON, Canada.},
journal = {J Thromb Haemost},
keywords = {Adult; Aged; Algorithms; Enzyme-Linked Immunosorbe},
month = {apr},
number = {4},
pages = {759--765},
pmid = {16634744},
title = {{Evaluation of pretest clinical score (4 T's) for the diagnosis of heparin-induced thrombocytopenia in two clinical settings.}},
url = {http://dx.doi.org/10.1111/j.1538-7836.2006.01787.x},
volume = {4},
year = {2006}
}
@article{Audibert-Bubeck-Munos-2010,
abstract = {We consider the problem of finding the best arm in a stochastic multi-armed bandit game. The regret of a forecaster is here defined by the gap between the mean reward of the optimal arm and the mean reward of the ultimately chosen arm. We propose a highly exploring UCB policy and a new algorithm based on successive rejects. We show that these algorithms are essentially optimal since their regret decreases exponentially at a rate which is, up to a logarithmic factor, the best possible. However, while the UCB policy needs the tuning of a parameter depending on the unobservable hardness of the task, the successive rejects policy benefits from being parameter-free, and also independent of the scaling of the rewards.},
author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi},
journal = {Order A Journal On The Theory Of Ordered Sets And Its Applications},
keywords = {learning,statistics {\&} optimisation,theory {\&} algorithms},
pages = {1--17},
title = {{Best arm identification in multi-armed bandits}},
url = {http://eprints.pascal-network.org/archive/00007409/},
year = {2010}
}
@article{mobius,
author = {Foldes, S and Hammer, P L},
journal = {Mathematics of Operations Research},
number = {2},
pages = {453--461},
title = {{Submodularity, Supermodularity, and Higher-Order Monotonicities of Pseudo-{\{}B{\}}oolean Functions}},
volume = {30},
year = {2005}
}
@book{munkres1984elements,
author = {Munkres, J R},
publisher = {Addison-Wesley Reading, MA},
title = {{Elements of algebraic topology}},
volume = {2},
year = {1984}
}
@inproceedings{KaSriTe08,
author = {Kakade, S M and Sridharan, K and Tewari, A},
pages = {793--800},
title = {{On the complexity of linear prediction: Risk bounds, margin bounds, and regularization}}
}
@inproceedings{RaSriTe11,
author = {Rakhlin, A and Sridharan, K and Tewari, A},
title = {{Online Learning: Stochastic and Constrained Adversaries}}
}
@incollection{choi2011map,
author = {Choi, Jaedeug and Kim, Kee-Eung},
booktitle = {Advances in Neural Information Processing Systems 24},
editor = {Shawe-Taylor, J and Zemel, R S and Bartlett, P and Pereira, F C N and Weinberger, K Q},
pages = {1989--1997},
title = {{MAP Inference for Bayesian Inverse Reinforcement Learning}},
year = {2011}
}
@article{zhou2003probabilistic,
author = {Zhou, Shaohua and Kruger, Volker and Chellappa, Rama},
journal = {Computer Vision and Image Understanding},
number = {1-2},
pages = {214--245},
title = {{Probabilistic Recognition of Human Faces from Video}},
volume = {91},
year = {2003}
}
@article{hauskrecht2000value-function,
author = {Hauskrecht, Milos},
journal = {Journal of Artificial Intelligence Research},
pages = {33--94},
title = {{Value-Function Approximations for Partially Observable {\{}Markov{\}} Decision Processes}},
volume = {13},
year = {2000}
}
@inproceedings{hendrickson1995multilevel,
author = {Hendrickson, B and Leland, R},
booktitle = {Proceedings of Supercomputing},
title = {{A multilevel algorithm for partitioning graphs}},
year = {1995}
}
@inproceedings{blelloch2010hierarchical,
author = {Blelloch, Guy E and Koutis, Ioannis and Miller, Gary L and Tangwongsan, Kanat},
booktitle = {High Performance Computing, Networking, Storage and Analysis (SC), 2010 International Conference for},
organization = {IEEE},
pages = {1--12},
title = {{Hierarchical diagonal blocking and precision reduction applied to combinatorial multigrid}},
year = {2010}
}
@article{kveton2006solvinga,
author = {Kveton, Branislav and Hauskrecht, Milos and Guestrin, Carlos},
journal = {Journal of Artificial Intelligence Research},
pages = {153--201},
title = {{Solving Factored {\{}MDPs{\}} with Hybrid State and Action Variables}},
volume = {27},
year = {2006}
}
@inproceedings{kocak2014efficient,
abstract = {We consider online learning problems under a partial observability model capturing situations where the information conveyed to the learner is between full information and bandit feedback. In the simplest variant, we assume that in addition to its own loss, the learner also gets to observe losses of some other actions. The revealed losses depend on the learner's action and a directed observation system chosen by the environment. For this setting, we propose the first algorithm that enjoys near-optimal regret guarantees without having to know the observation system before selecting its actions. Along similar lines, we also define a new partial information setting that models online combinatorial optimization problems where the feedback received by the learner is between semi-bandit and full feedback. As the predictions of our first algorithm cannot be always computed efficiently in this setting, we propose another algorithm with similar properties and with the benefit of always being computationally efficient, at the price of a slightly more complicated tuning mechanism. Both algorithms rely on a novel exploration strategy called implicit exploration, which is shown to be more efficient both computationally and information-theoretically than previously studied exploration strategies for the problem.},
author = {Koc{\'{a}}k, Tom{\'{a}}{\v{s}} and Neu, Gergely and Valko, Michal and Munos, R{\'{e}}mi},
booktitle = {Neural Information Processing Systems},
title = {{Efficient learning by implicit exploration in bandit problems with side observations}},
year = {2014}
}
@inproceedings{matsuzaki05latent,
address = {Morristown, NJ, USA},
author = {Matsuzaki, Takuya and Miyao, Yusuke and Tsujii, Jun'ichi},
booktitle = {ACL '05: Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics},
doi = {http://dx.doi.org/10.3115/1219840.1219850},
pages = {75--82},
publisher = {Association for Computational Linguistics},
title = {{Probabilistic {\{}CFG{\}} with latent annotations}},
year = {2005}
}
@book{fedorov1972theory,
author = {Fedorov, V V},
publisher = {Academic press},
title = {{Theory of optimal experiments}},
year = {1972}
}
@incollection{he2012imitation,
author = {He, He and III, Hal Daume and Eisner, Jason},
booktitle = {Advances in Neural Information Processing Systems 25},
editor = {Bartlett, P and Pereira, F C N and Burges, C J C and Bottou, L and Weinberger, K Q},
pages = {3158--3166},
title = {{Imitation Learning by Coaching}},
url = {http://books.nips.cc/papers/files/nips25/NIPS2012{\_}1449.pdf},
year = {2012}
}
@article{benini1999policy,
author = {Benini, Luca and Bogliolo, Alessandro and Paleologo, Giuseppe and Micheli, Giovanni De},
journal = {IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
number = {6},
pages = {813--833},
title = {{Policy Optimization for Dynamic Power Management}},
volume = {18},
year = {1999}
}
@techreport{submodlp,
author = {Obozinski, G and Bach, F},
institution = {HAL},
title = {{Convex relaxation of Combinatorial penalties}},
year = {2011}
}
@inproceedings{Kim2009,
author = {Kim, S and Xing, E P},
booktitle = {Proceedings of the International Conference on Machine Learning (ICML)},
title = {{Tree-Guided Group {\{}L{\}}asso for Multi-Task Regression with Structured Sparsity}},
year = {2010}
}
@article{Hoe63,
author = {Hoeffding, W},
journal = {Journal of the American Statistical Association},
pages = {13--30},
title = {{Probability inequalities for sums of bounded random variables}},
volume = {58},
year = {1963}
}
@book{ahuja1993network,
author = {Ahuja, R K and Magnanti, T L and Orlin, J B},
publisher = {Prentice hall},
title = {{Network flows: theory, algorithms, and applications}},
year = {1993}
}
@inproceedings{badanidiyuru2013bandits,
abstract = {Multi-armed bandit problems are the predominant theoretical model of exploration-exploitation tradeoffs in learning, and they have countless applications ranging from medical trials, to communication networks, to Web search and advertising. In many of these application domains the learner may be constrained by one or more supply (or budget) limits, in addition to the customary limitation on the time horizon. The literature lacks a general model encompassing these sorts of problems. We introduce such a model, called "bandits with knapsacks", that combines aspects of stochastic integer programming with online learning. A distinctive feature of our problem, in comparison to the existing regret-minimization literature, is that the optimal policy for a given latent distribution may significantly outperform the policy that plays the optimal fixed arm. Consequently, achieving sub linear regret in the bandits-with-knapsacks problem is significantly more challenging than in conventional bandit problems. We present two algorithms whose reward is close to the information-theoretic optimum: one is based on a novel "balanced exploration" paradigm, while the other is a primal-dual algorithm that uses multiplicative updates. Further, we prove that the regret achieved by both algorithms is optimal up to polylogarithmic factors. We illustrate the generality of the problem by presenting applications in a number of different domains including electronic commerce, routing, and scheduling. As one example of a concrete application, we consider the problem of dynamic posted pricing with limited supply and obtain the first algorithm whose regret, with respect to the optimal dynamic policy, is sub linear in the supply.},
archivePrefix = {arXiv},
arxivId = {1305.2545},
author = {Badanidiyuru, Ashwinkumar and Kleinberg, Robert and Slivkins, Aleksandrs},
booktitle = {Proceedings - Annual IEEE Symposium on Foundations of Computer Science, FOCS},
doi = {10.1109/FOCS.2013.30},
eprint = {1305.2545},
isbn = {9780769551357},
issn = {02725428},
keywords = {Dynamic ad allocation,Dynamic pricing,Dynamic procurement,Exploratio},
pages = {207--216},
title = {{Bandits with knapsacks}},
year = {2013}
}
@book{duda73pattern,
author = {Duda, Richard and Hart, Peter},
publisher = {Wiley},
title = {{Pattern Classification and Scene Analysis}},
year = {1973}
}
@article{marbach2001simulation-based,
author = {Marbach, Peter and Tsitsiklis, John},
journal = {IEEE Transactions on Automatic Control},
number = {2},
pages = {191--209},
title = {{Simulation-Based Optimization of {\{}Markov{\}} Reward Processes}},
volume = {46},
year = {2001}
}
@article{KV05,
author = {Kalai, A and Vempala, S},
journal = {Journal of Computer and System Sciences},
pages = {291--307},
title = {{Efficient algorithms for online decision problems}},
volume = {71},
year = {2005}
}
@inproceedings{hwang1997predictive,
author = {Hwang, Chi-Hong and Wu, Allen},
booktitle = {Proceedings of the 1997 IEEE / ACM International Conference on Computer-Aided Design},
pages = {28--32},
title = {{A Predictive System Shutdown Method for Energy Saving of Event-Driven Computation}},
year = {1997}
}
@article{sethuraman1994constructive,
annote = {c{\_}omps{\_}models},
author = {Sethuraman, J},
journal = {Statistica Sinica},
pages = {639--650},
title = {{A constructive definition of Dirichlet priors}},
url = {http://www3.stat.sinica.edu.tw/statistica/oldpdf/A4n216.pdf},
volume = {4},
year = {1994}
}
@article{devroye13rwalk_it,
author = {Devroye, L and Lugosi, G and Neu, G},
journal = {Submitted to the IEEE Transactions on Information Theory},
title = {{Prediction by random-walk perturbation}},
year = {2013}
}
@article{bickel_lasso_dantzig,
author = {Bickel, P and Ritov, Y and Tsybakov, A},
journal = {Annals of Statistics},
number = {4},
pages = {1705--1732},
title = {{Simultaneous analysis of {\{}L{\}}asso and {\{}D{\}}antzig selector}},
volume = {37},
year = {2009}
}
@article{WK08,
author = {Warmuth, M and Kuzmin, D},
journal = {Journal of Machine Learning Research},
pages = {2287--2320},
title = {{Randomized Online {\{}PCA{\}} Algorithms with Regret Bounds that are Logarithmic in the Dimension}},
volume = {9},
year = {2008}
}
@inproceedings{ashbrook2008quickdraw:,
author = {Ashbrook, Daniel L and Clawson, James R and Lyons, Kent and Starner, Thad E and Patel, Nirmal},
booktitle = {Proceeding of the twenty-sixth annual SIGCHI conference on Human factors in computing systems},
pages = {219--222},
series = {CHI '08},
title = {{Quickdraw: the impact of mobility and on-body placement on device access time}},
year = {2008}
}
@article{hall1984best,
author = {Hall, Peter and Welsh, Alan H.},
doi = {10.1214/aos/1176346723},
journal = {The Annals of Statistics},
number = {3},
pages = {1079--1084},
publisher = {The Institute of Mathematical Statistics},
title = {{Best Attainable Rates of Convergence for Estimates of Parameters of Regular Variation}},
volume = {12},
year = {1984}
}
@inproceedings{kocak2014wspectral,
abstract = {Smooth functions on graphs have wide applications in manifold and semi-supervised learning. In this paper, we study a bandit problem where the payoffs of arms are smooth on a graph. This framework is suitable for solving online learning problems that involve graphs, such as content-based recommendation. In this problem, each recommended item is a node and its expected rating is similar to its neighbors. The goal is to recommend items that have high expected ratings. We aim for the algorithms where the cumulative regret would not scale poorly with the number of nodes. In particular, we introduce the notion of an effective dimension, which is small in real-world graphs, and propose two algorithms for solving our problem that scale linearly in this dimension. Our experiments on real-world content recommendation problem show that a good estimator of user preferences for thousands of items can be learned from just tens nodes evaluations.},
author = {Koc{\'{a}}k, Tom{\'{a}}{\v{s}} and Valko, Michal and Munos, R{\'{e}}mi and Kveton, Branislav and Agrawal, Shipra},
booktitle = {AAAI Workshop on Sequential Decision-Making with Big Data},
title = {{Spectral bandits for smooth graph functions with applications in recommender systems}},
year = {2014}
}
@inproceedings{doya95novel,
author = {Doya, K and Sejnowski, T J},
pages = {101--108},
title = {{A Novel Reinforcement Model of Birdsong Vocalization Learning}},
url = {citeseer.ist.psu.edu/doya95novel.html}
}
@incollection{hauskrecht2006fundamentals,
abstract = {Finding reliable, meaningful patterns in data with high numbers of attributes can be extremely difficult. Feature selection helps us to decide what attributes or combination of attributes are most important for finding these patterns. In this chapter, we study feature selection methods for building classification models from high-throughput genomic (microarray) and proteomic (mass spectrometry) data sets. Thousands of feature candidates must be analyzed, compared and combined in such data sets. We describe the basics of four different approaches used for feature selection and illustrate their effects on an MS cancer proteomic data set. The closing discussion provides assistance in performing an analysis in high-dimensional genomic and proteomic data.},
author = {Hauskrecht, Milos and Pelikan, Richard and Valko, Michal and Lyons-Weiler, James},
booktitle = {Fundamentals of Data Mining in Genomics and Proteomics},
keywords = {misovalko},
mendeley-tags = {misovalko},
publisher = {Springer},
title = {{Feature selection and dimensionality reduction in genomics and proteomics}},
year = {2006}
}
@inproceedings{lafferty01conditional,
address = {San Francisco, CA, USA},
annote = {From Duplicate 1 (Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data - Lafferty, John D; McCallum, Andrew; Pereira, Fernando C N)
comps{\_}models},
author = {Lafferty, John D and McCallum, Andrew and Pereira, Fernando C N},
booktitle = {ICML '01: Proceedings of the Eighteenth International Conference on Machine Learning},
isbn = {1-55860-778-1},
pages = {282--289},
publisher = {Morgan Kaufmann Publishers Inc.},
title = {{Conditional Random Fields: Probabilistic Models for Segmenting and Labeling Sequence Data}},
url = {http://www.cis.upenn.edu/{~}pereira/papers/crf.pdf citeseer.ist.psu.edu/lafferty01conditional.html},
year = {2001}
}
@inproceedings{hillel2007learning,
address = {New York, NY, USA},
annote = {comps{\_}distance},
author = {Hillel, Aharon Bar and Weinshall, Daphna},
booktitle = {ICML '07: Proceedings of the 24th international conference on Machine learning},
doi = {http://doi.acm.org/10.1145/1273496.1273505},
isbn = {978-1-59593-793-3},
pages = {65--72},
publisher = {ACM},
title = {{Learning distance function by coding similarity}},
year = {2007}
}
@incollection{rakhlin12rr,
author = {Rakhlin, Sasha and Shamir, Ohad and Sridharan, Karthik},
booktitle = {Advances in Neural Information Processing Systems 25},
pages = {2150--2158},
title = {{Relax and Randomize : From Value to Algorithms}},
year = {2012}
}
@article{lecchini2009stochastic,
abstract = {We introduce bounds on the finite-time performance of Markov chain Monte Carlo algorithms in approaching the global solution of stochastic optimization problems over continuous domains. A comparison with other state-of-the-art methods having finite-time guarantees for solving stochastic programming problems is included.},
archivePrefix = {arXiv},
arxivId = {0906.1055},
author = {Lecchini-Visintini, A. and Lygeros, J. and Maciejowski, J.},
eprint = {0906.1055},
month = {jun},
pages = {29},
title = {{Stochastic optimization on continuous domains with finite-time guarantees by Markov chain Monte Carlo methods}},
url = {http://arxiv.org/abs/0906.1055},
year = {2009}
}
@inproceedings{ratliff2007online,
author = {Ratliff, Nathan and Bagnell, Andrew and Zinkevich, Martin},
booktitle = {Proceedings of the 11th International Conference on Artificial Intelligence and Statistics},
title = {{({\{}Online{\}}) Subgradient Methods for Structured Prediction}},
year = {2007}
}
@inproceedings{belkin2004regularization,
author = {Belkin, Mikhail and Matveeva, Irina and Niyogi, Partha},
booktitle = {Conference on Learning Theory},
title = {{Regularization and semi-supervised learning on large graphs}},
year = {2004}
}
@inproceedings{abbeel2004apprenticeship,
author = {Abbeel, Pieter and Ng, Andrew},
booktitle = {Proceedings of the 21st international conference on machine learning},
doi = {http://doi.acm.org/10.1145/1015330.1015430},
isbn = {1-58113-838-5},
keywords = {irl},
mendeley-tags = {irl},
title = {{Apprenticeship learning via inverse reinforcement learning}},
url = {http://www.eecs.harvard.edu/{~}parkes/cs286r/spring06/papers/abeelng.pdf},
year = {2004}
}
@inproceedings{feige2006maximizing,
author = {Feige, U},
booktitle = {Proc. ACM symposium on Theory of computing},
pages = {41--50},
title = {{On maximizing welfare when utility functions are subadditive}},
year = {2006}
}
@inproceedings{Steinberger2009,
author = {Steinberger, R and Pouliquen, B and {Van der Goot}, E},
booktitle = {Information Access in a Multilingual World-Proceedings of the SIGIR 2009 Workshop (SIGIR-CLIR'2009)},
pages = {1--8},
title = {{An Introduction to the {\{}Europe Media Monitor{\}} Family of Applications}},
year = {2009}
}
@article{foster11neigh,
author = {Foster, Dean P and Rakhlin, Alexander},
journal = {CoRR},
title = {{No Internal Regret via Neighborhood Watch}},
volume = {abs/1108.6},
year = {2011}
}
@article{bach2008cgl,
author = {Bach, F},
journal = {Journal of Machine Learning Research},
pages = {1179--1225},
publisher = {MIT Press Cambridge, MA, USA},
title = {{Consistency of the group {\{}L{\}}asso and multiple kernel learning}},
volume = {9},
year = {2008}
}
@inproceedings{carpentier11active,
author = {Carpentier, Alexandra and Lazaric, Alessandro and Ghavamzadeh, Mohammad and Munos, R{\'{e}}mi and Auer, Peter},
booktitle = {ALT},
pages = {189--203},
title = {{Upper-Confidence-Bound Algorithms for Active Learning in Multi-armed Bandits.}},
year = {2011}
}
@inproceedings{koutis2010approaching,
author = {Koutis, Ioannis and Miller, Gary L. and Peng, Richard},
booktitle = {2010 IEEE 51st Annual Symposium on Foundations of Computer Science},
doi = {10.1109/FOCS.2010.29},
isbn = {978-1-4244-8525-3},
issn = {0272-5428},
language = {English},
month = {oct},
pages = {235--244},
publisher = {IEEE},
title = {{Approaching Optimality for Solving SDD Linear Systems}},
url = {http://ieeexplore.ieee.org/articleDetails.jsp?arnumber=5671167},
year = {2010}
}
@inproceedings{ferns2005metrics,
author = {Ferns, Norm and Panangaden, Prakash and Precup, Doina},
booktitle = {Proceedings of the 21st Conference on Uncertainty in Artificial Intelligence},
title = {{Metrics for {\{}Markov{\}} Decision Processes with Infinite State Spaces}},
year = {2005}
}
@article{whitney35abstract,
author = {Whitney, Hassler},
journal = {American Journal of Mathematics},
number = {3},
pages = {509--533},
title = {{On the abstract properties of linear dependence}},
volume = {57},
year = {1935}
}
@phdthesis{shalev-shwartz2007online,
author = {Shalev-Shwartz, Shai},
keywords = {bandits},
mendeley-tags = {bandits},
month = {jul},
school = {The Hebrew University of Jerusalem},
title = {{Online Learning: Theory, Algorithms, and Applications}},
year = {2007}
}
@article{lane1999temporal,
address = {New York, NY, USA},
annote = {comps{\_}anX},
author = {Lane, Terran and Brodley, Carla E},
doi = {http://doi.acm.org/10.1145/322510.322526},
issn = {1094-9224},
journal = {ACM Trans. Inf. Syst. Secur.},
number = {3},
pages = {295--331},
publisher = {ACM},
title = {{Temporal sequence learning and data reduction for anomaly detection}},
volume = {2},
year = {1999}
}
@inproceedings{alaoui2014fast,
abstract = {One approach to improving the running time of kernel-based machine learning methods is to build a small sketch of the input and use it in lieu of the full kernel matrix in the machine learning task of interest. Here, we describe a version of this approach that comes with running time guarantees as well as improved guarantees on its statistical performance. By extending the notion of $\backslash$emph{\{}statistical leverage scores{\}} to the setting of kernel ridge regression, our main statistical result is to identify an importance sampling distribution that reduces the size of the sketch (i.e., the required number of columns to be sampled) to the $\backslash$emph{\{}effective dimensionality{\}} of the problem. This quantity is often much smaller than previous bounds that depend on the $\backslash$emph{\{}maximal degrees of freedom{\}}. Our main algorithmic result is to present a fast algorithm to compute approximations to these scores. This algorithm runs in time that is linear in the number of samples---more precisely, the running time is {\$}O(np{\^{}}2){\$}, where the parameter {\$}p{\$} depends only on the trace of the kernel matrix and the regularization parameter---and it can be applied to the matrix of feature vectors, without having to form the full kernel matrix. This is obtained via a variant of length-squared sampling that we adapt to the kernel setting in a way that is of independent interest. Lastly, we provide empirical results illustrating our theory, and we discuss how this new notion of the statistical leverage of a data point captures in a fine way the difficulty of the original statistical learning problem.},
author = {Alaoui, Ahmed El and Mahoney, Michael W.},
booktitle = {Neural Information Processing Systems},
title = {{Fast randomized kernel methods with statistical guarantees}},
year = {2015}
}
@article{collins03head-driven,
address = {Cambridge, MA, USA},
author = {Collins, Michael},
doi = {http://dx.doi.org/10.1162/089120103322753356},
issn = {0891-2017},
journal = {Computational Linguistics},
number = {4},
pages = {589--637},
publisher = {MIT Press},
title = {{Head-Driven Statistical Models for Natural Language Parsing}},
volume = {29},
year = {2003}
}
@article{Moreau1962,
author = {Moreau, J J},
journal = {C. R. Acad. Sci. Paris S{\'{e}}r. A Math.},
pages = {2897--2899},
title = {{Fonctions convexes duales et points proximaux dans un espace {\{}H{\}}ilbertien}},
volume = {255},
year = {1962}
}
@article{gray1998quantization,
author = {Gray, Robert and Neuhoff, David},
journal = {IEEE Transactions on Information Theory},
number = {6},
pages = {2325--2383},
title = {{Quantization}},
volume = {44},
year = {1998}
}
@inproceedings{Petrov-Klein-2007:AAAI,
annote = {(Nectar Track)},
author = {Petrov, Slav and Klein, Dan},
pages = {1663--1666},
title = {{Learning and Inference for Hierarchically Split {\{}PCFG{\}}s}},
url = {www.eecs.berkeley.edu/{~}petrov/data/aaai2007.pdf}
}
@inproceedings{Babaioff09truthful,
author = {Babaioff, Moshe and Sharma, Yogeshwer and Slivkins, Aleksandrs},
booktitle = {ACM-EC},
pages = {79--88},
title = {{Characterizing truthful multi-armed bandit mechanisms}},
year = {2009}
}
@book{SNW11,
editor = {Sra, S and Nowozin, S and Wright, S},
publisher = {MIT Press},
title = {{Optimization for Machine Learning}},
year = {2011}
}
@article{azuma1967weighted,
author = {Azuma, Kazuoki},
journal = {Tohoku Mathematical Journal},
keywords = {bound,math},
number = {3},
pages = {357--367},
title = {{Weighted sums of certain dependent random variables}},
volume = {19},
year = {1967}
}
@book{LT91,
author = {Ledoux, M and Talagrand, M},
publisher = {Springer},
title = {{Probability in Banach Spaces}},
year = {1991}
}
@article{tesauro1992practical,
author = {Tesauro, Gerald},
journal = {Machine Learning},
number = {3-4},
pages = {257--277},
title = {{Practical Issues in Temporal Difference Learning}},
volume = {8},
year = {1992}
}
@inproceedings{tsang2006large-scale,
author = {Tsang, Ivor W and Kwok, James T},
booktitle = {NIPS},
title = {{Large-Scale Sparsified Manifold Regularization.}},
year = {2006}
}
@article{korostelev1999asymptotic,
author = {Korostelev, Alexander and Nussbaum, Michael},
journal = {Bernoulli},
number = {6},
pages = {1099--1118},
title = {{The asymptotic minimax constant for sup-norm loss in nonparametric density estimation}},
volume = {5},
year = {1999}
}
@inproceedings{viola2001robust,
author = {Viola, Paul A and Jones, Michael J},
booktitle = {ICCV},
pages = {747},
title = {{Robust Real-Time Face Detection}},
year = {2001}
}
@article{bubeck2012regret,
abstract = {Multi-armed bandit problems are the most basic examples of sequential decision problems with an exploration-exploitation trade-off. This is the balance between staying with the option that gave highest payoffs in the past and exploring new options that might give higher payoffs in the future. Although the study of bandit problems dates back to the Thirties, exploration-exploitation trade-offs arise in several modern applications, such as ad placement, website optimization, and packet routing. Mathematically, a multi-armed bandit is defined by the payoff process associated with each option. In this survey, we focus on two extreme cases in which the analysis of regret is particularly simple and elegant: i.i.d. payoffs and adversarial payoffs. Besides the basic setting of finitely many actions, we also analyze some of the most important variants and extensions, such as the contextual bandit model.},
archivePrefix = {arXiv},
arxivId = {1204.5721},
author = {Bubeck, S{\'{e}}bastien and Cesa-Bianchi, Nicol{\`{o}}},
eprint = {1204.5721},
journal = {Foundations and Trends in Machine Learning},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {1--122},
title = {{Regret Analysis of Stochastic and Nonstochastic Multi-armed Bandit Problems}},
url = {http://arxiv.org/abs/1204.5721},
volume = {5},
year = {2012}
}
@book{grunbaum2003convex,
author = {Gr{\"{u}}nbaum, B},
publisher = {Springer Verlag},
title = {{Convex polytopes}},
volume = {221},
year = {2003}
}
@inproceedings{turian06advances,
address = {Morristown, NJ, USA},
author = {Turian, Joseph and Melamed, I Dan},
booktitle = {ACL '06: Proceedings of the 21st International Conference on Computational Linguistics and the 44th Annual Meeting of the Association for Computational Linguistics},
doi = {http://dx.doi.org/10.3115/1220175.1220285},
pages = {873--880},
publisher = {Association for Computational Linguistics},
title = {{Advances in discriminative parsing}},
year = {2006}
}
@article{Zhaoyu,
author = {Zhao, P and Yu, B},
journal = {Journal of Machine Learning Research},
pages = {2541--2563},
title = {{On Model Selection Consistency of {\{}L{\}}asso.}},
volume = {7},
year = {2006}
}
@book{berry1985bandit,
author = {Berry, Donald A and Fristedt, Bert},
booktitle = {Journal of the Operational Research Society},
number = {8},
pages = {viii, 275},
publisher = {Chapman and Hall},
series = {Monographs on statistics and applied probability},
title = {{Bandit Problems: Sequential Allocation of Experiments}},
volume = {38},
year = {1985}
}
@article{zhang1998characterization,
author = {Zhang, Z and Yeung, R W},
journal = {IEEE Transactions on Information Theory},
number = {4},
pages = {1440--1452},
publisher = {IEEE},
title = {{On characterization of entropy function via information inequalities}},
volume = {44},
year = {1998}
}
@misc{sailing,
annote = {http://www.sor.princeton.edu/{\~{}}rvdb/sail/sail.html},
author = {Vanderbei, R},
publisher = {University of Princeton},
title = {{Optimal sailing strategies, statistics and operations research program}},
year = {1996}
}
@article{post2008temporal,
abstract = {Large-scale clinical databases provide a detailed perspective on patient
phenotype in disease and the characteristics of health care processes.
Important information is often contained in the relationships between
the values and timestamps of sequences of clinical data. The analysis
of clinical time sequence data across entire patient populations
may reveal data patterns that enable a more precise understanding
of disease presentation, progression, and response to therapy, and
thus could be of great value for clinical and translational research.
Recent work suggests that the combination of temporal data mining
methods with techniques from artificial intelligence research on
knowledge-based temporal abstraction may enable the mining of clinically
relevant temporal features from these previously problematic general
clinical data.},
author = {Post, Andrew R and Harrison, James H},
doi = {10.1016/j.cll.2007.10.005},
institution = {Division of Clinical Informatics, Department of Public Health Sciences, University of Virginia, Suite 3181 West Complex, 1335 Hospital Drive, Charlottesville, VA 22908-0717, USA. arp4m@virginia.edu},
journal = {Clin Lab Med},
keywords = {Algorithms; Artificial Intelligence; Databases as,Automated; Software; Time Factors,methods; Pattern Recognition},
month = {mar},
number = {1},
pages = {83----100, vii},
pmid = {18194720},
title = {{Temporal data mining.}},
url = {http://dx.doi.org/10.1016/j.cll.2007.10.005},
volume = {28},
year = {2008}
}
@article{helmbold2000adaptive,
author = {Helmbold, David and Long, Darrell and Sconyers, Tracey and Sherrod, Bruce},
journal = {Mobile Networks and Applications},
number = {4},
pages = {285--297},
title = {{Adaptive Disk Spin-Down for Mobile Computers}},
volume = {5},
year = {2000}
}
@techreport{cohen2015ridge,
abstract = {Often used as importance sampling probabilities, leverage scores have become indispensable in randomized algorithms for linear algebra, optimization, graph theory, and machine learning. A major body of work seeks to adapt these scores to low-rank approximation problems. However, existing "low-rank leverage scores" can be difficult to compute, often work for just a single application, and are sensitive to matrix perturbations. We show how to avoid these issues by exploiting connections between low-rank approximation and regularization. Specifically, we employ ridge leverage scores, which are simply standard leverage scores computed with respect to an {\$}\backslashell{\_}2{\$} regularized input. Importance sampling by these scores gives the first unified solution to two of the most important low-rank sampling problems: {\$}(1+\backslashepsilon){\$} error column subset selection and {\$}(1+\backslashepsilon){\$} error projection-cost preservation. Moreover, ridge leverage scores satisfy a key monotonicity property that does not hold for any prior low-rank leverage scores. Their resulting robustness leads to two sought-after results in randomized linear algebra. 1) We give the first input-sparsity time low-rank approximation algorithm based on iterative column sampling, resolving an open question posed in [LMP13], [CLM+15], and [AM15]. 2) We give the first single-pass streaming column subset selection algorithm whose real-number space complexity has no dependence on stream length.},
archivePrefix = {arXiv},
arxivId = {1511.07263},
author = {Cohen, Michael B. and Musco, Cameron and Musco, Christopher},
eprint = {1511.07263},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Cohen, Musco, Musco - 2015 - Ridge Leverage Scores for Low-Rank Approximation.pdf:pdf},
month = {nov},
title = {{Ridge leverage scores for low-rank approximation}},
url = {http://arxiv.org/abs/1511.07263},
year = {2015}
}
@inproceedings{cooper1988method,
author = {Cooper, Gregory},
booktitle = {Proceedings of the Workshop on Uncertainty in Artificial Intelligence},
pages = {55--63},
title = {{A Method for Using Belief Networks as Influence Diagrams}},
year = {1988}
}
@inproceedings{delalleau2005efficient,
author = {Delalleau, Olivier and Bengio, Yoshua and Roux, Nicolas Le},
booktitle = {AISTAT},
pages = {96--103},
title = {{Efficient Non-Parametric Function Induction in Semi-Supervised Learning}},
year = {2005}
}
@techreport{nene1996columbia,
author = {Nene, S A and Nayar, S K and Murase, H},
booktitle = {Technical Report, Department of Computer Science, Columbia University CUCS-005-96},
institution = {Columbia University},
month = {feb},
title = {{{\{}C{\}}olumbia {\{}O{\}}bject {\{}I{\}}mage {\{}L{\}}ibrary ({\{}C{\}}{\{}O{\}}{\{}I{\}}{\{}L{\}}-100)}},
year = {1996}
}
@misc{graclus,
author = {Graclus},
publisher = {University of Texas},
title = {{Graclus}},
url = {www.cs.utexas.edu/users/dml/Software/graclus.html},
year = {2013}
}
@article{ghahramani2000variational,
address = {Cambridge, MA, USA},
annote = {c{\_}omps{\_}models},
author = {Ghahramani, Zoubin and Hinton, Geoffrey E},
doi = {http://dx.doi.org/10.1162/089976600300015619},
issn = {0899-7667},
journal = {Neural Comput.},
number = {4},
pages = {831--864},
publisher = {MIT Press},
title = {{Variational Learning for Switching State-Space Models}},
volume = {12},
year = {2000}
}
@article{gelly2012grand,
author = {Gelly, Sylvain and Kocsis, Levente and Schoenauer, Marc and Sebag, Mich{\`{e}}le and Silver, David and Szepesv{\'{a}}ri, Csaba and Teytaud, Olivier},
journal = {Communications of the ACM},
month = {mar},
number = {3},
pages = {106--113},
publisher = {ACM},
title = {{The grand challenge of computer Go: Monte Carlo tree search and extensions}},
volume = {55},
year = {2012}
}
@incollection{BL13,
author = {Bubeck, Sebastien and Liu, Che-yu},
booktitle = {Advances in Neural Information Processing Systems 26},
editor = {Burges, C J C and Bottou, L and Welling, M and Ghahramani, Z and Weinberger, K Q},
pages = {638--646},
title = {{Prior-free and prior-dependent regret bounds for Thompson Sampling}},
year = {2013}
}
@inproceedings{kawahara22submodularity,
author = {Kawahara, Y and Nagano, K and Tsuda, K and Bilmes, J A},
booktitle = {Adv. NIPS 22},
title = {{Submodularity Cuts and Applications}},
year = {2009}
}
@book{CSV09,
author = {Conn, A and Scheinberg, K and Vicente, L},
publisher = {Society for Industrial and Applied Mathematics (SIAM)},
title = {{Introduction to Derivative-Free Optimization}},
year = {2009}
}
@inproceedings{hauskrecht2006approximate,
author = {Hauskrecht, Milos and Kveton, Branislav},
booktitle = {Proceedings of the 9th International Symposium on Artificial Intelligence and Mathematics},
pages = {114--120},
title = {{Approximate Linear Programming for Solving Hybrid Factored {\{}MDPs{\}}}},
year = {2006}
}
@book{filar1996competitive,
address = {New York, NY},
author = {Filar, Jerzy and Vrieze, Koos},
isbn = {1461284813 9781461284819},
publisher = {Springer New York},
title = {{Competitive Markov decision processes}},
year = {2012}
}
@book{Spa03,
author = {Spall, J},
publisher = {Wiley Interscience},
title = {{Introduction to stochastic search and optimization. Estimation, simulation, and control}},
year = {2003}
}
@article{bates2003ten,
abstract = {While evidence-based medicine has increasingly broad-based support
in health care, it remains difficult to get physicians to actually
practice it. Across most domains in medicine, practice has lagged
behind knowledge by at least several years. The authors believe that
the key tools for closing this gap will be information systems that
provide decision support to users at the time they make decisions,
which should result in improved quality of care. Furthermore, providers
make many errors, and clinical decision support can be useful for
finding and preventing such errors. Over the last eight years the
authors have implemented and studied the impact of decision support
across a broad array of domains and have found a number of common
elements important to success. The goal of this report is to discuss
these lessons learned in the interest of informing the efforts of
others working to make the practice of evidence-based medicine a
reality.},
author = {Bates, David W and Kuperman, Gilad J and Wang, Samuel and Gandhi, Tejal and Kittler, Anne and Volk, Lynn and Spurr, Cynthia and Khorasani, Ramin and Tanasijevic, Milenko and Middleton, Blackford},
doi = {10.1197/jamia.M1370},
institution = {Department of Medicine, Brigham and Women's Hospital, Boston, MA 02115, USA. dbates@partners.org},
journal = {J Am Med Inform Assoc},
keywords = {Clinical,Computer-Assisted,Decision Making,Decision Support Systems,Decision Support Techniques,Evidence-B},
number = {6},
pages = {523--530},
pmid = {12925543},
title = {{Ten commandments for effective clinical decision support: making the practice of evidence-based medicine a reality.}},
url = {http://dx.doi.org/10.1197/jamia.M1370},
volume = {10},
year = {2003}
}
@inproceedings{pinto2009how,
author = {Pinto, Nicolas and DiCarlo, James and Cox, David},
booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
title = {{How Far Can You Get with a Modern Face Recognition Test Set Using Only Simple Features?}},
year = {2009}
}
@article{smola2004tutorial,
author = {Smola, Alex and Sch{\"{o}}lkopf, Bernhard},
journal = {Statistics and Computing},
number = {3},
pages = {199--222},
title = {{A Tutorial on Support Vector Regression}},
volume = {14},
year = {2004}
}
@inproceedings{shelton2001policy,
author = {Shelton, Christian},
booktitle = {Proceedings of the 17th Conference on Uncertainty in Artificial Intelligence},
pages = {496--503},
title = {{Policy Improvement for {\{}POMDPs{\}} Using Normalized Importance Sampling}},
year = {2001}
}
@inproceedings{guillou2015collaborative,
abstract = {Recommender Systems (RS) aim at suggesting to users one or several items in which they might have interest. Following the feedback they receive from the user, these systems have to adapt their model in order to improve future recommendations. The repetition of these steps defines the RS as a sequential process. This sequential aspect raises an exploration-exploitation dilemma, which is surprisingly rarely taken into account for RS without contextual information. In this paper we present an explore-exploit collaborative filtering RS, based on Matrix Factor-ization and Bandits algorithms. Using experiments on artificial and real datasets, we show the importance and practicability of using sequential approaches to perform recommendation. We also study the impact of the model update on both the quality and the computation time of the recommendation procedure.},
author = {Guillou, Fr{\'{e}}d{\'{e}}ric and Gaudel, Romaric and Preux, Philippe},
booktitle = {NIPS Workshop on Machine Learning for eCommerce},
keywords = {Collaborative Filtering,Matrix Factorization,Multi-Armed Bandtis,Recommender Systems,sequential Recommender Systems},
month = {dec},
title = {{Collaborative filtering as a multi-armed bandit}},
year = {2015}
}
@article{erdos1960evolution,
author = {Erdos, Paul and R{\'{e}}nyi, Alfr{\'{e}}d},
journal = {Publ. Mathematical Institute of the Hungarian Academy of Sciences},
title = {{On the evolution of random graphs}},
url = {http://www.citeulike.org/group/3072/article/1666220},
year = {1960}
}
@misc{movielens,
author = {Lam, Shyong and Herlocker, Jon},
howpublished = {http://www.grouplens.org/node/12},
title = {{MovieLens 1M Dataset}},
year = {2012}
}
@book{kearfott1996rigorous,
author = {Kearfott, R Baker},
isbn = {9780792342380},
publisher = {Springer},
series = {Nonconvex Optimization and Its Applications},
title = {{Rigorous Global Search: Continuous Problems}},
url = {http://books.google.fr/books?id=GBVnnsN5yCYC},
year = {1996}
}
@book{Bertsekas,
author = {Bertsekas, D},
publisher = {Athena Scientific},
title = {{Nonlinear programming}},
year = {1995}
}
@techreport{jaggi,
author = {Jaggi, M},
institution = {Arxiv},
number = {1108.1170},
title = {{Convex Optimization without Projection Steps}},
year = {2011}
}
@article{aha1991instance-based,
address = {Hingham, MA, USA},
author = {Aha, David W and Kibler, Dennis and Albert, Marc K},
doi = {http://dx.doi.org/10.1023/A:1022689900470},
issn = {0885-6125},
journal = {Mach. Learn.},
number = {1},
pages = {37--66},
publisher = {Kluwer Academic Publishers},
title = {{Instance-Based Learning Algorithms}},
volume = {6},
year = {1991}
}
@inproceedings{carpentier2014extreme,
abstract = {In many areas of medicine, security, and life sciences, we want to allocate limited resources to different sources in order to detect extreme values. In this paper, we study an efficient way to allocate these resources sequentially under limited feedback. While sequential design of experiments is well studied in bandit theory, the most commonly optimized property is the regret with respect to the maximum mean reward. However, in other problems such as network intrusion detection, we are interested in detecting the most extreme value output by the sources. Therefore, in our work we study extreme regret which measures the efficiency of an algorithm compared to the oracle policy selecting the source with the heaviest tail. We propose the ExtremeHunter algorithm, provide its analysis, and evaluate it empirically on synthetic and real-world experiments.},
author = {Carpentier, Alexandra and Valko, Michal},
booktitle = {Neural Information Processing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Carpentier, Valko - 2014 - Extreme bandits.pdf:pdf},
title = {{Extreme bandits}},
year = {2014}
}
@inproceedings{rudi2015less,
abstract = {We study Nystr$\backslash$"om type subsampling approaches to large scale kernel methods, and prove learning bounds in the statistical learning setting, where random sampling and high probability estimates are considered. In particular, we prove that these approaches can achieve optimal learning bounds, provided the subsampling level is suitably chosen. These results suggest a simple incremental variant of Nystr$\backslash$"om Kernel Regularized Least Squares, where the subsampling level implements a form of computational regularization, in the sense that it controls at the same time regularization and computations. Extensive experimental analysis shows that the considered approach achieves state of the art performances on benchmark large scale datasets.},
author = {Rudi, Alessandro and Camoriano, Raffaello and Rosasco, Lorenzo},
booktitle = {Neural Information Processing Systems},
title = {{Less is more: Nystr{\"{o}}m computational regularization}},
year = {2015}
}
@article{chow1991optimal,
author = {Chow, Chee-Seng and Tsitsiklis, John},
journal = {IEEE Transactions on Automatic Control},
number = {8},
pages = {898--914},
title = {{An Optimal One-Way Multigrid Algorithm for Discrete-Time Stochastic Control}},
volume = {36},
year = {1991}
}
@inproceedings{sanner2006practical,
author = {Sanner, Scott and Boutilier, Craig},
booktitle = {Proceedings of the 22nd Conference on Uncertainty in Artificial Intelligence},
title = {{Practical Linear Value-Approximation Techniques for First-Order {\{}MDPs{\}}}},
year = {2006}
}
@article{bala2001conformism,
abstract = {When there are competing technologies or products with unknown payoffs an important question is which technology will prevail and whether technologies with different payoffs can coexist in the long run. In this paper, we use a social learning model with local interactions to study this question. We show that the adoption of technologies as well as the prospects of conformism/diversity depend crucially on the nature of interaction between individuals and the heterogeneity of preferences in a society.},
author = {Bala, Venkatesh and Goyal, Sanjeev},
journal = {Economic Theory},
pages = {101--120},
title = {{Conformism and diversity under social learning}},
volume = {17},
year = {2001}
}
@article{buntine1996guide,
address = {Thinkbank, 1678 Shattuck Ave, Suite 320, Berkeley, Ca, 94709},
author = {Buntine, W},
journal = {IEEE Transactions on Knowledge and Data Engineering},
pages = {195--210},
title = {{A Guide to the Literature on Learning Probabilistic Networks from Data}},
url = {citeseer.nj.nec.com/buntine96guide.html},
volume = {8},
year = {1996}
}
@techreport{gelly2006modifications,
author = {Gelly, Sylvain and Yizao, Wang and Munos, R{\'{e}}mi and Teytaud, Olivier},
institution = {Inria},
title = {{Modification of UCT with patterns in Monte-Carlo Go}},
url = {https://hal.inria.fr/inria-00117266},
year = {2006}
}
@article{gyorfi2007sequential,
author = {Gy{\"{o}}rfi, L{\'{a}}szl{\'{o}} and Ottucs{\'{a}}k, Gy{\"{o}}rgy},
journal = {IEEE Transactions on Information Theory},
number = {5},
pages = {1866--1872},
title = {{Sequential prediction of unbounded stationary time series}},
volume = {53},
year = {2007}
}
@inproceedings{ST10,
author = {Sridharan, K and Tewari, A},
booktitle = {Proceedings of the 23rd Annual Conference on Learning Theory (COLT)},
title = {{Convex Games in Banach Spaces}},
year = {2010}
}
@inproceedings{liu2011dynamic,
abstract = {We consider a large-scale cyber network with N components (e.g., paths, servers, subnets). Each component is either in a healthy state (0) or an abnormal state (1). Due to random intrusions, the state of each component transits from 0 to 1 over time according to certain stochastic process. At each time, a subset of K (K {\textless} N) components are checked and those observed in abnormal states are fixed. The objective is to design the optimal scheduling for intrusion detection such that the long-term network cost incurred by all abnormal components is minimized. We formulate the problem as a special class of Restless Multi-Armed Bandit (RMAB) process. A general RMAB suffers from the curse of dimensionality (PSPACE-hard) and numerical methods are often inapplicable. We show that, for this class of RMAB, Whittle index exists and can be obtained in closed form, leading to a low-complexity implementation of Whittle index policy with a strong performance. For homogeneous components, Whittle index policy is shown to have a simple structure that does not require any prior knowledge on the intrusion processes. Based on this structure, Whittle index policy is further shown to be optimal over a finite time horizon with an arbitrary length. Beyond intrusion detection, these results also find applications in queuing networks with finite-size buffers.},
archivePrefix = {arXiv},
arxivId = {1112.0101},
author = {Liu, Keqin and Zhao, Qing},
booktitle = {IEEE International Symposium on Information Theory Proceedings},
eprint = {1112.0101},
title = {{Dynamic Intrusion Detection in Resource-Constrained Cyber Networks}},
url = {http://arxiv.org/abs/1112.0101},
year = {2012}
}
@article{narayanan1995rounding,
author = {Narayanan, H},
journal = {Linear algebra and its applications},
pages = {41--57},
publisher = {Elsevier},
title = {{A rounding technique for the polymatroid membership problem}},
volume = {221},
year = {1995}
}
@inproceedings{russell1998learning,
abstract = {This talk proposes a very simple 'baseline architecture' for a learning agent that can handle stochastic, partially observable environments. The architecture uses reinforcement learning together with a method for representing temporal processes as graphical models. I will discuss methods for learning the parameters and structure of such representations from sensory inputs, and for computing posterior probabilities. Some open problems remain before we can try out the complete agent; more arise when we consider scaling up. A second theme of the talk will be whether reinforcement learning can provide a good model of animal and human learning. To answer this question, we must do inverse reinforcement learning: given the observed behaviour, what reward signal, if any, is being optimized? This seems to be a very interesting problem for the COLT, UAI, and ML communities, and has been addressed in econometrics under the heading of structural estimation of Markov decision processes.},
author = {Russell, Stuart},
booktitle = {Proceedings of the 11th Annual Conference on Computational Learning Theory (COLT)},
doi = {10.1145/279943.279964},
isbn = {1581130570},
keywords = {Decision theory,Graphic methods,Inverse problems,Inverse reinforcement learning,Learning agents,Learning systems,Markov processes,Mathematical models,Optimization,Probability,Problem solving,Reinforcement learning},
pages = {101--103},
title = {{Learning agents for uncertain environments (extended abstract)}},
url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-0031640746{\&}partnerID=40},
year = {1998}
}
@article{wainwright2008graphical,
author = {Wainwright, M J and Jordan, M I},
journal = {Foundations and Trends{\{}$\backslash$textregistered{\}} in Machine Learning},
number = {1-2},
pages = {1--305},
publisher = {Now Publishers Inc.},
title = {{Graphical models, exponential families, and variational inference}},
volume = {1},
year = {2008}
}
@book{Nocedal:1999:NO1,
author = {Nocedal, J and Wright, S J},
edition = {2nd},
publisher = {Springer},
title = {{Numerical Optimization}},
year = {2006}
}
@article{baxter2001infinite-horizon,
author = {Baxter, Jonathan and Bartlett, Peter and Weaver, Lex},
journal = {Journal of Artificial Intelligence Research},
pages = {319--350},
title = {{Infinite-Horizon Policy-Gradient Estimation}},
volume = {15},
year = {2001}
}
@article{higdon1998auxiliary,
author = {Higdon, David},
journal = {Journal of the American Statistical Association},
number = {442},
pages = {585--595},
title = {{Auxiliary Variable Methods for {\{}Markov{\}} Chain {\{}Monte Carlo{\}} with Applications}},
volume = {93},
year = {1998}
}
@article{freund1997decision,
author = {Freund, Y and Schapire, R E},
journal = {Journal of Computer and System Sciences},
pages = {119--139},
title = {{A decision-theoretic generalization of on-line learning and an application to boosting}},
volume = {55},
year = {1997}
}
@phdthesis{sondik1971optimal,
author = {Sondik, Edward},
school = {Stanford University},
title = {{The Optimal Control of Partially Observable {\{}Markov{\}} Decision Processes}},
year = {1971}
}
@article{lagoudakis2003least-squares,
author = {Lagoudakis, Michail and Parr, Ronald},
journal = {Journal of Machine Learning Research},
pages = {1107--1149},
title = {{Least-Squares Policy Iteration}},
volume = {4},
year = {2003}
}
@inproceedings{munos1999variable,
author = {Munos, Remi and Moore, Andrew},
booktitle = {Proceedings of the 16th International Joint Conference on Artificial Intelligence},
pages = {1348--1355},
title = {{Variable Resolution Discretization for High-Accuracy Solutions of Optimal Control Problems}},
year = {1999}
}
@inproceedings{grill2016blazing,
abstract = {You are a robot and you live in a Markov decision process (MDP) with a finite or an infinite number of transitions from state-action to next states. You got brains and so you plan before you act. Luckily, your roboparents equipped you with a generative model to do some Monte-Carlo planning. The world is waiting for you and you have no time to waste. You want your planning to be efficient. Sample-efficient. Indeed, you want to exploit the possible structure of the MDP by exploring only a subset of states reachable by following near-optimal policies. You want guarantees on sample complexity that depend on a measure of the quantity of near-optimal states. You want something, that is an extension of Monte-Carlo sampling (for estimating an expectation) to problems that alternate maximization (over actions) and expectation (over next states). But you do not want to StOP with exponential running time, you want something simple to implement and computationally efficient. You want it all and you want it now. You want TrailBlazer.},
author = {Grill, Jean-Bastien and Valko, Michal and Munos, R{\'{e}}mi},
booktitle = {Neural Information Processing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Grill, Valko, Munos - 2016 - Blazing the trails before beating the path Sample-efficient Monte-Carlo planning.pdf:pdf},
title = {{Blazing the trails before beating the path: Sample-efficient Monte-Carlo planning}},
year = {2016}
}
@misc{baxter99direct,
author = {Baxter, J and Bartlett, P},
title = {{Direct Gradient-Based Reinforcement Learning}},
url = {citeseer.ist.psu.edu/baxter99direct.html},
year = {1999}
}
@article{BMSS11,
author = {Bubeck, S and Munos, R and Stoltz, G and Szepesvari, C},
journal = {Journal of Machine Learning Research},
pages = {1587--1627},
title = {{$\backslash$mathcal{\{}X{\}}-Armed Bandits}},
volume = {12},
year = {2011}
}
@article{arora2012deterministic,
author = {Arora, Raman and Dekel, Ofer and Tewari, Ambuj},
journal = {CoRR},
title = {{Deterministic {\{}MDP{\}}s with Adversarial Rewards and Bandit Feedback}},
volume = {abs/1210.4},
year = {2012}
}
@inproceedings{lussdecomposing,
author = {Luss, R and Rosset, S and Shahar, M},
booktitle = {Adv. NIPS},
title = {{Decomposing Isotonic Regression for Efficiently Solving Large Problems}},
volume = {23},
year = {2010}
}
@inproceedings{dams13wireless,
author = {Dams, Johannes and Hoefer, Martin and Kesselheim, Thomas},
booktitle = {DISC},
editor = {Afek, Yehuda},
pages = {344--357},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
title = {{Sleeping Experts in Wireless Networks.}},
volume = {8205},
year = {2013}
}
@inproceedings{boularias2011relative,
abstract = {We consider the problem of imitation learn- ing where the examples, demonstrated by an expert, cover only a small part of a large state space. Inverse Reinforcement Learning (IRL) provides an efficient tool for generaliz- ing the demonstration, based on the assump- tion that the expert is optimally acting in a Markov Decision Process (MDP). Most of the past work on IRL requires that a (near)- optimal policy can be computed for differ- ent reward functions. However, this require- ment can hardly be satisfied in systems with a large, or continuous, state space. In this pa- per, we propose a model-free IRL algorithm, where the relative entropy between the em- pirical distribution of the state-action trajec- tories under a uniform policy and their distri- bution under the learned policy is minimized by stochastic gradient descent. We compare this new approach to well-known IRL algo- rithms using approximate MDP models. Em- pirical results on simulated car racing, grid- world and ball-in-a-cup problems show that our approach is able to learn good policies from a small number of demonstrations.},
author = {Boularias, Abdeslam and Kober, Jens and Peters, Jan},
booktitle = {Proceedings of the 14th International Con- ference on Artificial Intelligence and Statistics},
pages = {182--189},
title = {{Relative Entropy Inverse Reinforcement Learning}},
volume = {15},
year = {2011}
}
@article{nemhauser1978analysis,
author = {Nemhauser, G L and Wolsey, L A and Fisher, M L},
journal = {Mathematical Programming},
number = {1},
pages = {265--294},
publisher = {Springer},
title = {{An analysis of approximations for maximizing submodular set functions--I}},
volume = {14},
year = {1978}
}
@article{baxter2001experiments,
author = {Baxter, Jonathan and Bartlett, Peter and Weaver, Lex},
journal = {Journal of Artificial Intelligence Research},
pages = {351--381},
title = {{Experiments with Infinite-Horizon, Policy-Gradient Estimation}},
volume = {15},
year = {2001}
}
@inproceedings{shalev-shwartz2004online,
address = {New York, NY, USA},
annote = {comps{\_}distance},
author = {Shalev-Shwartz, Shai and Singer, Yoram and Ng, Andrew Y},
booktitle = {ICML '04: Proceedings of the twenty-first international conference on Machine learning},
doi = {http://doi.acm.org/10.1145/1015330.1015376},
isbn = {1-58113-828-5},
pages = {94},
publisher = {ACM},
title = {{Online and batch learning of pseudo-metrics}},
year = {2004}
}
@inproceedings{kveton2010online,
abstract = {This paper proposes an algorithm for real-time learning without explicit
feedback. The algorithm combines the ideas of semi-supervised learning
on graphs and online learning. In particular, it iteratively builds
a graphical representation of its world and updates it with observed
examples. Labeled examples constitute the initial bias of the algorithm
and are provided offline, and a stream of unlabeled examples is collected
online to update this bias. We motivate the algorithm, discuss how
to implement it efficiently, prove a regret bound on the quality
of its solutions, and apply it to the problem of real-time face recognition.
Our recognizer runs in real time, and achieves superior precision
and recall on 3 challenging video datasets.},
address = {San Francisco, CA},
author = {Kveton, Branislav and Valko, Michal and Phillipose, Matthai and Huang, Ling},
booktitle = {The Fourth IEEE Online Learning for Computer Vision Workshop in The Twenty--Third IEEE Conference on Computer Vision and Pattern Recognition},
keywords = {misovalko},
mendeley-tags = {misovalko},
title = {{Online Semi-Supervised Perception: Real-Time Learning without Explicit Feedback}},
year = {2010}
}
@article{ellison1993rules,
abstract = {This paper studies agents who consider the experiences of their neighbors in deciding which of two technologies to use. We analyze two learning environments, one in which the same technology is optimal for all players and another in which each technology is better for some of them. In both environments, players use exogenously specified rules of thumb that ignore historical data but may incorporate a tendency to use the more popular technology. In some cases these naive rules can lead to fairly efficient decisions in the long run, but adjustment can be slow when a superior technology is first introduced.},
author = {Ellison, Glenn and Fudenberg, Drew},
journal = {Journal of Political Economy},
number = {4},
pages = {612--643},
title = {{Rules of thumb for social learning}},
volume = {101},
year = {1993}
}
@inproceedings{silander2006simple,
address = {Arlington, Virginia},
author = {Silander, Tomi and Myllymaki, Petri},
booktitle = {Proceedings of the 22nd Annual Conference on Uncertainty in Artificial Intelligence (UAI-06)},
publisher = {AUAI Press},
title = {{A simple approach for finding the globally optimal Bayesian network structure}},
year = {2006}
}
@techreport{contal2016stochastic,
abstract = {The paper considers the problem of global optimization in the setup of stochastic process bandits. We introduce an UCB algorithm which builds a cascade of discretization trees based on generic chaining in order to render possible his operability over a continuous domain. The theoretical framework applies to functions under weak probabilistic smoothness assumptions and also extends significantly the spectrum of application of UCB strategies. Moreover generic regret bounds are derived which are then specialized to Gaussian processes indexed on infinite-dimensional spaces as well as to quadratic forms of Gaussian processes. Lower bounds are also proved in the case of Gaussian processes to assess the optimality of the proposed algorithm.},
archivePrefix = {arXiv},
arxivId = {1602.04976},
author = {Contal, Emile and Vayatis, Nicolas},
eprint = {1602.04976},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Contal, Vayatis - 2016 - Stochastic process bandits Upper confidence bounds algorithms via generic chaining.pdf:pdf},
month = {feb},
title = {{Stochastic process bandits: Upper confidence bounds algorithms via generic chaining}},
year = {2016}
}
@article{neal2000markov,
abstract = {This article reviews Markov chain methods for sampling from the posterior
distribution of a Dirichlet process mixture model and presents two
new classes of methods. One new approach is to make Metropolis-Hastings
updates of the indicators specifying which mixture component is associated
with each observation, perhaps supplemented with a partial form of
Gibbs sampling. The other new approach extends Gibbs sampling for
these indicators by using a set of auxiliary parameters. These methods
are simple to implement and are more efficient than previous ways
of handling general Dirichlet process mixture models with non-conjugate
priors.},
annote = {c{\_}omps{\_}models},
author = {Neal, Radford M},
journal = {Journal of Computational and Graphical Statistics},
keywords = {dirichlet,dp,gibbs-sampling,mcmc},
number = {2},
pages = {249--265},
title = {{Markov Chain Sampling Methods for Dirichlet Process Mixture Models}},
url = {http://www.jstor.org/stable/1390653},
volume = {9},
year = {2000}
}
@article{munos2002variable,
author = {Munos, Remi and Moore, Andrew},
journal = {Machine Learning},
pages = {291--323},
title = {{Variable Resolution Discretization in Optimal Control}},
volume = {49},
year = {2002}
}
@inproceedings{aktolga2010detecting,
author = {Aktolga, Elif and Ros, Irene and Assogba, Yannick},
booktitle = {Proceedings of SIGIR},
title = {{Detecting Outlier Sections in US Congressional Legislation}},
type = {IR},
year = {2010}
}
@inproceedings{boutilier1996approximating,
author = {Boutilier, Craig and Dearden, Richard},
booktitle = {Proceedings of the 13th International Conference on Machine Learning},
pages = {54--62},
title = {{Approximating Value Trees in Structured Dynamic Programming}},
year = {1996}
}
@article{Auer2010,
abstract = {In the stochastic multi-armed bandit problem we consider a modification of the UCB algorithm of Auer et al. 4. For this modified algorithm we give an improved bound on the regret with respect to the optimal reward. While for the original UCB algorithm the regret in K-armed bandits after T trials is bounded by const K log(T)/Delta, where Delta measures the distance between a suboptimal arm and the optimal arm, for the modified UCB algorithm we show an upper bound on the regret of const K log (T/Delta 2) Delta.},
author = {Auer, Peter and Ortner, Ronald},
doi = {10.1007/s10998-010-3055-6},
journal = {Periodica Mathematica Hungarica},
keywords = {computational,information theoretic learning with statistics,learning,statistics {\&} optimisation,theory {\&} algorithms},
pages = {1--11},
title = {{UCB revisited: Improved regret bounds for the stochastic multi-armed bandit problem}},
volume = {61},
year = {2010}
}
@phdthesis{collins99thesis,
author = {Collins, Michael},
school = {University of Pennsylvania},
title = {{Head-Driven Statistical Models for Natural Language Processing}},
year = {1999}
}
@article{hochbaum2001efficient,
author = {Hochbaum, D S},
journal = {Journal of the ACM},
number = {4},
pages = {686--701},
publisher = {ACM},
title = {{An efficient algorithm for image segmentation, Markov random fields and related problems}},
volume = {48},
year = {2001}
}
@book{rockafellar_subgrad,
author = {Rockafellar, R T},
publisher = {Heldermann Verlag, Berlin, D},
title = {{The theory of subgradients and its applications to problems of optimization: {\{}C{\}}onvex and nonconvex functions}},
year = {1981}
}
@article{agrawal2012thompsonarxiv,
abstract = {Thompson Sampling is one of the oldest heuristics for multi-armed bandit problems. It is a randomized algorithm based on Bayesian ideas, and has recently generated significant interest after several studies demonstrated it to have better empirical performance compared to the state-of-the-art methods. However, many questions regarding its theoretical performance remained open. In this paper, we design and analyze a generalization of Thompson Sampling algorithm for the stochastic contextual multi-armed bandit problem with linear payoff functions, when the contexts are provided by an adaptive adversary. This is among the most important and widely studied versions of the contextual bandits problem. We provide the first theoretical guarantees for the contextual version of Thompson Sampling. We prove a high probability regret bound of {\$}\backslashtilde{\{}O{\}}(d{\^{}}{\{}3/2{\}}\backslashsqrt{\{}T{\}}){\$} (or {\$}\backslashtilde{\{}O{\}}(d\backslashsqrt{\{}T \backslashlog(N){\}}){\$}), which is the best regret bound achieved by any computationally efficient algorithm available for this problem in the current literature, and is within a factor of {\$}\backslashsqrt{\{}d{\}}{\$} (or {\$}\backslashsqrt{\{}\backslashlog(N){\}}{\$}) of the information-theoretic lower bound for this problem.},
archivePrefix = {arXiv},
arxivId = {1209.3352},
author = {Agrawal, Shipra and Goyal, Navin},
eprint = {1209.3352},
journal = {CoRR, abs/1209.3352, http://arxiv.org/abs/1209.3352},
month = {sep},
title = {{Thompson Sampling for Contextual Bandits with Linear Payoffs}},
url = {http://arxiv.org/abs/1209.3352},
year = {2012}
}
@inproceedings{jensen1994from,
author = {Jensen, Frank and Jensen, Finn and Dittmer, S{\o}ren},
booktitle = {Proceedings of the 10th Conference on Uncertainty in Artificial Intelligence},
pages = {367--373},
title = {{From Influence Diagrams to Junction Trees}},
year = {1994}
}
@article{shi2000normalized,
abstract = {We propose a novel approach for solving the perceptual grouping problem in vision. Rather than focusing on local features and their consistencies in the image data, our approach aims at extracting the global impression of an image. We treat image segmentation as a graph},
author = {Shi, J and Malik, J},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
pages = {888--905},
title = {{Normalized Cuts and Image Segmentation}},
volume = {22},
year = {2000}
}
@inproceedings{meek2003uai,
booktitle = {UAI},
editor = {Meek, Christopher and Kj{\ae}rulff, Uffe},
isbn = {0-127-05664-5},
publisher = {Morgan Kaufmann},
title = {{UAI '03, Proceedings of the 19th Conference in Uncertainty in Artificial Intelligence, August 7-10 2003, Acapulco, Mexico}},
year = {2003}
}
@article{Bull2015b,
author = {Bull, Adam D.},
issn = {1350-7265},
journal = {Bernoulli},
keywords = {bandits on taxonomies,continuum-armed bandits,noisy global optimisation,tree-armed bandits,zooming dimension},
month = {nov},
number = {4},
pages = {2289--2307},
publisher = {Bernoulli Society for Mathematical Statistics and Probability},
title = {{Adaptive-treed Bandits}},
url = {http://projecteuclid.org/euclid.bj/1438777594},
volume = {21},
year = {2015}
}
@inproceedings{audiffren2015maximum,
abstract = {A popular approach to apprenticeship learning (AL) is to formulate it as an inverse reinforcement learning (IRL) problem. The MaxEnt-IRL algorithm successfully integrates the maximum entropy principle into IRL and unlike its predecessors, it resolves the ambiguity arising from the fact that a possibly large number of policies could match the expert's behavior. In this paper, we study an AL setting in which in addition to the expert's trajectories, a number of unsupervised trajectories is available. We introduce MESSI, a novel algorithm that combines MaxEnt-IRL with principles coming from semi-supervised learning. In particular, MESSI integrates the unsupervised data into the MaxEnt-IRL framework using a pairwise penalty on trajectories. Empirical results in a highway driving and grid-world problems indicate that MESSI is able to take advantage of the unsupervised trajectories and improve the performance of MaxEnt-IRL.},
author = {Audiffren, Julien and Valko, Michal and Lazaric, Alessandro and Ghavamzadeh, Mohammad},
booktitle = {International Joint Conferences on Artificial Intelligence},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Audiffren et al. - 2015 - MESSI Maximum entropy semi-supervised inverse reinforcement learning.pdf:pdf},
title = {{MESSI: Maximum entropy semi-supervised inverse reinforcement learning}},
year = {2015}
}
@inproceedings{jebara2009graph,
author = {Jebara, Tony and Wang, Jun and Chang, Shih-Fu},
booktitle = {Proceedings of ICML},
pages = {441--448},
title = {{Graph construction and b-matching for semi-supervised learning}},
year = {2009}
}
@inproceedings{sanner2005approximate,
author = {Sanner, Scott and Boutilier, Craig},
booktitle = {Proceedings of the 21st Conference on Uncertainty in Artificial Intelligence},
title = {{Approximate Linear Programming for First-Order {\{}MDPs{\}}}},
year = {2005}
}
@article{blei2005variational,
annote = {c{\_}omps{\_}models},
author = {Blei, David M and Jordan, Michael I},
journal = {Bayesian Analysis},
pages = {2006},
title = {{Variational inference for Dirichlet process mixtures}},
url = {http://stat-www.berkeley.edu/tech-reports/674.pdf},
volume = {1},
year = {2005}
}
@inproceedings{farajtabar2016multistage,
abstract = {We consider the problem of how to optimize multi-stage campaigning over social networks. The dynamic programming framework is employed to balance the high present reward and large penalty on low future outcome in the presence of extensive uncertainties. In particular, we establish theoretical foundations of optimal campaigning over social networks where the user activities are modeled as a multivariate Hawkes process, and we derive a time dependent linear relation between the intensity of exogenous events and several commonly used objective functions of campaigning. We further develop a convex dynamic programming framework for determining the optimal intervention policy that prescribes the required level of external drive at each stage for the desired campaigning result. Experiments on both synthetic data and the real-world MemeTracker dataset show that our algorithm can steer the user activities for optimal campaigning much more accurately than baselines.},
author = {Farajtabar, Mehrdad and Ye, Xiaojing and Harati, Sahar and Song, Le and Zha, Hongyuan},
booktitle = {Neural Information Processing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Farajtabar et al. - 2016 - Multistage Campaigning in Social Networks.pdf:pdf},
title = {{Multistage campaigning in social networks}},
year = {2016}
}
@book{jeffreys1988methods,
address = {Cambridge, United Kingdom},
author = {Jeffreys, Harold and Jeffreys, Bertha},
publisher = {Cambridge University Press},
title = {{Methods of Mathematical Physics}},
year = {1988}
}
@inproceedings{ashkan14diversified,
author = {Ashkan, Azin and Kveton, Branislav and Berkovsky, Shlomo and Wen, Zheng},
booktitle = {Conference on Recommender Systems},
title = {{Diversified utility maximization for recommendations}},
year = {2014}
}
@article{RM51,
author = {Robbins, H and Monro, S},
journal = {Annals of Mathematical Statistics},
pages = {400--407},
title = {{A stochastic approximation method}},
volume = {22},
year = {1951}
}
@article{GLS01,
author = {Grove, A and Littlestone, N and Schuurmans, D},
journal = {Machine Learning},
pages = {173--210},
title = {{General convergence results for linear discriminant updates}},
volume = {43},
year = {2001}
}
@article{grant2006disciplined,
abstract = {Training a support vector machine (SVM) leads to a quadratic optimization problem with bound constraints and one linear equality constraint. Despite the fact that this type of problem is well understood, there are many issues to be considered in designing an SVM learner. In particular, for large learning tasks with many training examples, o -the-shelf optimization techniques for general quadratic programs quickly become intractable in their memory and time requirements. S V Mlight1 is an implementation of an SVM learner which addresses the problem of large tasks. This chapter presents algorithmic and computational results developed for S V MlightV2.0, which make large-scale SVM training more practical. The results give guidelines for the application of SVMs to large domains},
author = {Grant, Michael and Boyd, Stephen and Ye, Yinyu},
doi = {10.1007/0-387-30528-9_7},
editor = {{Leo Liberti} and {Nelson Maculan}},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Grant, Boyd, Ye - 2006 - Disciplined Convex Programming and CVX.pdf:pdf},
institution = {Stanford},
isbn = {9780387282602},
journal = {Review Literature And Arts Of The Americas},
number = {3},
pages = {1--26},
publisher = {Springer},
series = {Nonconvex Optimization and Its Applications},
title = {{Disciplined Convex Programming and CVX}},
url = {http://www.springerlink.com/content/p00314m582n01743/},
volume = {C},
year = {2006}
}
@phdthesis{ortiz2002selecting,
author = {Ortiz, Luis},
school = {Brown University},
title = {{Selecting Approximately-Optimal Actions in Complex Structured Domains}},
year = {2002}
}
@techreport{Nesterov2007,
author = {Nesterov, Y},
institution = {Center for Operations Research and Econometrics (CORE), Catholic University of Louvain},
title = {{Gradient methods for minimizing composite objective function}},
year = {2007}
}
@book{DeLu2001book,
author = {Devroye, L and Lugosi, G},
publisher = {Springer},
title = {{Combinatorial methods in density estimation}},
year = {2001}
}
@inproceedings{park2001approximating,
author = {Park, James and Darwiche, Adnan},
booktitle = {Proceedings of the 17th Conference on Uncertainty in Artificial Intelligence},
pages = {403--410},
title = {{Approximating {\{}MAP{\}} Using Local Search}},
year = {2001}
}
@article{goldengorin1999data,
author = {Goldengorin, B and Sierksma, G and Tijssen, G A and Tso, M},
journal = {Management Science},
pages = {1539--1551},
publisher = {JSTOR},
title = {{The data-correcting algorithm for the minimization of supermodular functions}},
year = {1999}
}
@inproceedings{valko2010feature,
abstract = {The objective of this paper is to understand what characteris-tics
and features of clinical data influence physician.s deci-sion about
ordering laboratory tests or prescribing medica-tions the most. We
conduct our analysis on data and decisions extracted from electronic
health records of 4486 post-surgical cardiac patients. The summary
statistics for 335 different lab order decisions and 407 medication
decisions are reported. We show that in many cases, physician.s lab-order
and medication decisions are predicted well by simple patterns such
as last value of a single test result, time since a certain lab test
was ordered or time since certain procedure was executed.},
author = {Valko, Michal and Hauskrecht, Milos},
booktitle = {13th International Congress on Medical Informatics MEDINFO 2010},
keywords = {misovalko},
mendeley-tags = {misovalko},
title = {{Feature importance analysis for patient management decisions}},
year = {2010}
}
@book{AGZ10,
author = {Anderson, G and Guionnet, A and Zeitouni, O},
publisher = {Cambridge University Press},
title = {{An Introduction to Random Matrices}},
year = {2010}
}
@book{good1994permutation,
author = {Good, P},
publisher = {Springer-Verlag},
title = {{Permutation Tests: A Practical Guide to Resampling Methods for Testing Hypothesis}},
year = {1994}
}
@article{Becker2009,
author = {Becker, S and Bobin, J and Candes, E},
journal = {SIAM J. on Imaging Sciences},
number = {1},
pages = {1--39},
title = {{NESTA: A Fast and Accurate First-order Method for Sparse Recovery}},
volume = {4},
year = {2011}
}
@inproceedings{dhiman2006dynamic,
author = {Dhiman, Gaurav and Simunic, Tajana},
booktitle = {Proceedings of the 2006 IEEE / ACM International Conference on Computer-Aided Design},
title = {{Dynamic Power Management Using Machine Learning}},
year = {2006}
}
@inproceedings{levine2012continuous,
author = {Levine, Sergey and Koltun, Vladlen},
booktitle = {ICML '12: Proceedings of the 29th International Conference on Machine Learning},
title = {{Continuous Inverse Optimal Control with Locally Optimal Examples}},
url = {http://graphics.stanford.edu/projects/cioc/cioc.pdf},
year = {2012}
}
@techreport{pachocki2016analysis,
abstract = {We show that schemes for sparsifying matrices based on iteratively resampling rows yield guarantees matching classic 'offline' sparsifiers (see e.g. Spielman and Srivastava [STOC 2008]). In particular, this gives a formal analysis of a scheme very similar to the one proposed by Kelner and Levin [TCS 2013].},
archivePrefix = {arXiv},
arxivId = {1605.08194},
author = {Pachocki, Jakub},
eprint = {1605.08194},
title = {{Analysis of resparsification}},
url = {http://arxiv.org/abs/1605.08194},
year = {2016}
}
@inproceedings{valko2008learning,
abstract = {Multiple technologies that measure expression levels of protein mixtures in the human body offer a potential for detection and understanding the disease. The recent increase of these technologies prompts researchers to evaluate the individual and combined utility of data generated by the technologies. In this work, we study two data sources to measure the expression of protein mixtures in the human body: whole-sample MS profiling and multiplexed protein arrays. We investigate the individual and combined utility of these technologies by learning and testing a variety of classification models on the data from a pancreatic cancer study. We show that for the combination of these two (heterogeneous) datasets, classification models that work well on one of them individually fail on the combination of the two datasets. We study and propose a class of model fusion methods that acknowledge the differences and try to reap most of the benefits from their combination.},
author = {Valko, Michal and Pelikan, Richard and Hauskrecht, Milos},
booktitle = {AMIA Summit on Translational Bioinformatics},
keywords = {misovalko},
mendeley-tags = {misovalko},
month = {mar},
title = {{Learning predictive models for multiple heterogeneous proteomic datasources}},
year = {2008}
}
@article{ashkan15optimal,
author = {Ashkan, Azin and Kveton, Branislav and Berkovsky, Shlomo and Wen, Zheng},
journal = {International Joint Conferences on Artificial Intelligence},
title = {{Optimal greedy diversity for recommendation}},
year = {2015}
}
@article{gyorgy04efficient,
author = {Gy{\"{o}}rgy, A and Linder, T and Lugosi, G},
journal = {IEEE Transactions on Signal Processing},
pages = {2337--2347},
title = {{Efficient Adaptive Algorithms and Minimax Bounds for Zero-Delay Lossy Source Coding}},
volume = {52},
year = {2004}
}
@article{groenevelt1991two,
author = {Groenevelt, H},
journal = {European Journal of Operational Research},
number = {2},
pages = {227--236},
publisher = {Elsevier},
title = {{Two algorithms for maximizing a separable concave function over a polymatroid feasible region}},
volume = {54},
year = {1991}
}
@article{shiraev03,
author = {Shiraev, Dmitry Eric},
institution = {VT Electronic Thesis and Dissertation Archive [http://scholar.lib.vt.edu/theses/OAI2/] (United States)},
keywords = {Computer Science},
publisher = {VT},
title = {{Inverse Reinforcement Learning and Routing Metric Discovery}},
url = {http://scholar.lib.vt.edu/theses/available/etd-08242003-224906/},
year = {2003}
}
@book{gelman2004bayesian,
author = {Gelman, A},
publisher = {CRC press},
title = {{Bayesian data analysis}},
year = {2004}
}
@incollection{Haz11,
author = {Hazan, E},
booktitle = {Optimization for Machine Learning},
editor = {Sra, S and Nowozin, S and Wright, S},
pages = {287--303},
publisher = {MIT press},
title = {{The convex optimization approach to regret minimization}},
year = {2011}
}
@article{variant2011shamir,
author = {Shamir, Ohad},
journal = {CoRR},
title = {{A Variant of Azuma's Inequality for Martingales with Subgaussian Tails}},
volume = {abs/1110.2},
year = {2011}
}
@misc{kaggle2013,
title = {{Kaggle}},
url = {https://www.kaggle.com/},
year = {2013}
}
@inproceedings{kalenon,
author = {Kale, Satyen and Reyzin, Lev and Schapire, Robert E},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {1054--1062},
title = {{Non-Stochastic Bandit Slate Problems}}
}
@article{gans2005medical,
abstract = {We surveyed a nationally representative sample of medical group practices
to assess their current use of information technology (IT). Our results
suggest that adoption of electronic health records (EHRs) is progressing
slowly, at least in smaller practices, although a number of group
practices plan to implement an EHR within the next two years. Moreover,
the process of choosing and implementing an EHR appears to be more
complex and varied than we expected. This suggests a need for greater
support for practices, particularly smaller ones, in this quest if
the benefits expected from EHRs are to be realized.},
author = {Gans, David and Kralewski, John and Hammons, Terry and Dowd, Bryan},
doi = {10.1377/hlthaff.24.5.1323},
institution = {Medical Group Management Association, Englewood, Colorado, USA. dng@mgma.com},
journal = {Health Aff (Millwood)},
keywords = {Computerized,Data Collection; Diffusion of Innovation; Informat,Medical; United States,utilization; Medical Records Systems,utilization; Practice Management},
number = {5},
pages = {1323--1333},
pmid = {16162580},
title = {{Medical groups' adoption of electronic health records and information systems.}},
url = {http://dx.doi.org/10.1377/hlthaff.24.5.1323},
volume = {24},
year = {2005}
}
@inproceedings{neu2007apprenticeship,
abstract = {In this paper we propose a novel gradient algorithm to learn a policy from an expert's observed behavior assuming that the expert behaves optimally with respect to some unknown reward function of a Markovian Decision Problem. The algorithm's aim is to find a reward function such that the resulting optimal policy matches well the expert's observed behavior. The main difficulty is that the mapping from the parameters to policies is both nonsmooth and highly redundant. Resorting to subdifferentials solves the first difficulty, while the second one is overcome by computing natural gradients. We tested th eproposed method in two artificial domains and found it to be more reliable and efficient than some previous methods.},
author = {Neu, Gergely and Szepesv{\'{a}}ri, C},
booktitle = {Proceedings of the Twenty-Third Conference Annual Conference on Uncertainty in Artificial Intelligence (UAI-07)},
isbn = {0-9749039-3-00-9749039-3-0},
pages = {295----302},
title = {{Apprenticeship learning using inverse reinforcement learning and gradient methods}},
url = {http://arxiv.org/abs/1206.5264},
year = {2007}
}
@article{neu09parsers,
author = {Neu, G and Szepesv{\'{a}}ri, $\backslash$textCs.},
journal = {Machine Learning Journal},
number = {2},
pages = {303--337},
title = {{Training parsers by inverse reinforcement learning}},
volume = {77},
year = {2009}
}
@article{yedidia2005constructing,
annote = {comps{\_}models},
author = {Yedidia, J S and Freeman, W T and Weiss, Y},
doi = {10.1109/TIT.2005.850085},
issn = {0018-9448},
journal = {Information Theory, IEEE Transactions on},
keywords = {GBP algorithm,Kikuchi free energy,backpropagation,belief networks,cluster variation method,factor graphs,free energy approximation,generalized belief propagation,graph theory,inference mechanisms,inference problem,junction graph method,message passing,message passing Bethe approximation,region graph method,sum-product algorithm},
month = {jul},
number = {7},
pages = {2282--2312},
title = {{Constructing free-energy approximations and generalized belief propagation algorithms}},
volume = {51},
year = {2005}
}
@article{andrieu2003introduction,
abstract = {This purpose of this introductory paper is threefold. First, it introduces the Monte Carlo method with emphasis on probabilistic machine learning. Second, it reviews the main building blocks of modern Markov chain Monte Carlo simulation, thereby providing and introduction to the remaining papers of this special issue. Lastly, it discusses new interesting research horizons.},
author = {Andrieu, Christophe and {De Freitas}, Nando and Doucet, Arnaud and Jordan, Michael I.},
journal = {Machine Learning},
keywords = {MCMC,Markov chain Monte Carlo,Sampling,Stochastic algorithms},
number = {1-2},
pages = {5--43},
title = {{An introduction to MCMC for machine learning}},
volume = {50},
year = {2003}
}
@inproceedings{schultz2003learning,
annote = {comps{\_}distance},
author = {Schultz, Matthew and Joachims, Thorsten},
booktitle = {In NIPS},
publisher = {MIT Press},
title = {{Learning a distance metric from relative comparisons}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.4.1616},
year = {2003}
}
@inproceedings{leskovec2005graphs,
author = {Leskovec, Jure and Kleinberg, Jon and Faloutsos, Christos},
booktitle = {Proceedings of KDD'05},
organization = {ACM},
pages = {177--187},
title = {{Graphs over time: densification laws, shrinking diameters and possible explanations}},
year = {2005}
}
@article{littlestone1994weighted,
author = {Littlestone, Nick and Warmuth, Manfred},
journal = {Information and Computation},
number = {2},
pages = {212--261},
title = {{The weighted majority algorithm}},
volume = {108},
year = {1994}
}
@article{gine2010confidence,
author = {Gin{\'{e}}, Evarist and Nickl, Richard},
journal = {The Annals of Statistics},
number = {2},
pages = {1122--1170},
title = {{Confidence bands in density estimation}},
volume = {38},
year = {2010}
}
@inproceedings{edmonds,
author = {Edmonds, J},
booktitle = {Combinatorial optimization - Eureka, you shrink!},
pages = {11--26},
publisher = {Springer},
title = {{Submodular functions, matroids, and certain polyhedra}},
year = {2003}
}
@incollection{altun2006maximum,
address = {Cambridge, MA},
author = {Altun, Yasemin and McAllester, David and Belkin, Mikhail},
booktitle = {Advances in Neural Information Processing Systems 18},
editor = {Weiss, Y and Sch{\"{o}}lkopf, B and Platt, J},
pages = {33--40},
publisher = {MIT Press},
title = {{Maximum Margin Semi-Supervised Learning for Structured Variables}},
year = {2006}
}
@article{andrieu2003introduction,
author = {Andrieu, Christophe and de Freitas, Nando and Doucet, Arnaud and Jordan, Michael},
journal = {Machine Learning},
pages = {5--43},
title = {{An Introduction to {\{}MCMC{\}} for Machine Learning}},
volume = {50},
year = {2003}
}
@inproceedings{mahadevan2006learning,
author = {Mahadevan, Sridhar and Maggioni, Mauro and Ferguson, Kimberly and Osentoski, Sarah},
booktitle = {Proceedings of the 21st National Conference on Artificial Intelligence},
title = {{Learning Representation and Control in Continuous {\{}Markov{\}} Decision Processes}},
year = {2006}
}
@inproceedings{carpentier2015simple,
abstract = {We consider a stochastic bandit problem with infinitely many arms. In this setting, the learner has no chance of trying all the arms even once and has to dedicate its limited number of samples only to a certain number of arms. All previous algorithms for this setting were designed for minimizing the cumulative regret of the learner. In this paper, we propose an algorithm aiming at minimizing the simple regret. As in the cumulative regret setting of infinitely many armed bandits, the rate of the simple regret will depend on a parameter {\$}\backslashbeta{\$} characterizing the distribution of the near-optimal arms. We prove that depending on {\$}\backslashbeta{\$}, our algorithm is minimax optimal either up to a multiplicative constant or up to a {\$}\backslashlog(n){\$} factor. We also provide extensions to several important cases: when {\$}\backslashbeta{\$} is unknown, in a natural setting where the near-optimal arms have a small variance, and in the case of unknown time horizon.},
author = {Carpentier, Alexandra and Valko, Michal},
booktitle = {International Conference on Machine Learning},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Carpentier, Valko - 2015 - Simple regret for infinitely many armed bandits.pdf:pdf},
title = {{Simple regret for infinitely many armed bandits}},
year = {2015}
}
@inproceedings{klein03accurate,
author = {Klein, D and Manning, C D},
booktitle = {ACL '03: Proceedings of the 41st Annual Meeting of the Association for Computational Linguistics},
pages = {423--430},
title = {{Accurate Unlexicalized Parsing}},
year = {2003}
}
@article{metropolis1953equation,
author = {Metropolis, Nicholas and Rosenbluth, Arianna and Rosenbluth, Marshall and Teller, Augusta and Teller, Edward},
journal = {Journal of Chemical Physics},
pages = {1087--1092},
title = {{Equation of State Calculations by Fast Computing Machines}},
volume = {21},
year = {1953}
}
@article{hazel2000multivariate,
annote = {comps{\_}anX},
author = {Hazel, G G},
doi = {10.1109/36.843012},
issn = {0196-2892},
journal = {Geoscience and Remote Sensing, IEEE Transactions on},
keywords = {Markov processes,anomaly detection,first-order isotropic texture model,geophysical measurement technique,geophysical signal processing,geophysical techniques,image processing,image segmentation,image texture,joint spatial-spectral modeling,land surface,multidimensional signal processing,multispectral imagery,multispectral scene segmentation,multivariate Gaussian MRF,multivariate method,receiver operating characteristic,remote sensing,terrain mapping,terrain mappingGaussian Markov random field textu,vector observations},
month = {may},
number = {3},
pages = {1199--1211},
title = {{Multivariate Gaussian MRF for multispectral scene segmentation and anomaly detection}},
volume = {38},
year = {2000}
}
@techreport{wen2016influence,
abstract = {We study a stochastic online problem of learning to influence in a social network with semi-bandit feedback, individual observations of how influenced users influence others. Our problem combines challenges of partial monitoring, because the learning agent only observes the influenced portion of the network, and combinatorial bandits, because the cardinality of the feasible set is exponential in the maximum number of influencers. We propose a computationally efficient UCB-like algorithm for solving our problem, IMLinUCB, and analyze it on forests. Our regret bounds are polynomial in all quantities of interest; reflect the structure of the network; and do not depend on inherently large quantities, such as the reciprocal of the minimum probability of being influenced and the cardinality of the action set. To the best of our knowledge, these are the first such results. IMLinUCB permits linear generalization and therefore is suitable for large-scale problems. We evaluate IMLinUCB on several synthetic problems and observe that the regret of IMLinUCB scales as suggested by our upper bounds. A special form of our problem can be viewed as a linear bandit and we match the regret bounds of LinUCB in this case.},
archivePrefix = {arXiv},
arxivId = {1605.06593},
author = {Wen, Zheng and Kveton, Branislav and Valko, Michal},
eprint = {1605.06593},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Wen, Kveton, Valko - 2016 - Influence maximization with semi-bandit feedback.pdf:pdf},
title = {{Influence maximization with semi-bandit feedback}},
year = {2016}
}
@phdthesis{daume06thesis,
address = {Los Angeles, CA},
author = {{Daum{\'{e}} III}, Hal},
school = {University of Southern California},
title = {{Practical Structured Learning Techniques for Natural Language Processing}},
url = {http://pub.hal3.name/{\#}daume06thesis},
year = {2006}
}
@inproceedings{kveton2014matroid,
author = {Kveton, Branislav and Wen, Zheng and Ashkan, Azin and Eydgahi, Hoda and Eriksson, Brian},
booktitle = {Uncertainty in Artificial Intelligence},
title = {{Matroid bandits: Fast combinatorial optimization with learning}},
year = {2014}
}
@inproceedings{neu2015explore,
author = {Neu, Gergely},
booktitle = {Neural Information Processing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Neu - 2015 - Explore no more Improved high-probability regret bounds for non-stochastic bandits.pdf:pdf},
title = {{Explore no more: Improved high-probability regret bounds for non-stochastic bandits}},
year = {2015}
}
@article{rust1997using,
author = {Rust, John},
journal = {Econometrica},
number = {3},
pages = {487--516},
title = {{Using Randomization to Break the Curse of Dimensionality}},
volume = {65},
year = {1997}
}
@article{kelner2012spectral,
author = {Kelner, Jonathan A. and Levin, Alex},
journal = {Theory of Computing Systems},
number = {2},
pages = {243--262},
title = {{Spectral sparsification in the semi-streaming setting}},
volume = {53},
year = {2012}
}
@article{boros2002pseudo,
author = {Boros, E and Hammer, P L},
journal = {Discrete Applied Mathematics},
number = {1-3},
pages = {155--225},
publisher = {Elsevier},
title = {{Pseudo-{\{}B{\}}oolean optimization}},
volume = {123},
year = {2002}
}
@book{Cao07,
address = {New York},
author = {Cao, Xi-Ren},
publisher = {Springer},
title = {{Stochastic Learning and Optimization: A Sensitivity-Based Approach}},
year = {2007}
}
@article{megiddo1974optimal,
author = {Megiddo, N},
journal = {Mathematical Programming},
number = {1},
pages = {97--107},
publisher = {Springer},
title = {{Optimal flows in networks with multiple sources and sinks}},
volume = {7},
year = {1974}
}
@article{koutis2011combinatorial,
abstract = {Several algorithms for problems including image segmentation, gradient inpainting and total variation are based on solving symmetric diagonally dominant (SDD) linear systems. These algorithms generally produce results of high quality. However, existing solvers are not always efficient, and in many cases they operate only on restricted topologies. The unavailability of reliably efficient solvers has arguably hindered the adoptability of approaches and algorithms based on SDD systems, especially in applications involving very large systems. A central claim of this paper is that SDD-based approaches can now be considered practical and reliable. To support our claim we present Combinatorial Multigrid (CMG), the first reliably efficient SDD solver that tackles problems in general and arbitrary weighted topologies. The solver borrows the structure and operators of multigrid algorithms, but embeds into them powerful and algebraically sound combinatorial preconditioners, based on novel tools from support graph theory. In order to present the derivation of CMG, we review and exemplify key notions of support graph theory that can also guide the future development of specialized solvers. We validate our claims on very large systems derived from imaging applications. Finally, we outline two new reductions of non-linear filtering problems to SDD systems and review the integration of SDD systems into selected algorithms. {\textcopyright} 2011 Elsevier Inc. All rights reserved.},
author = {Koutis, Ioannis and Miller, Gary L. and Tolliver, David},
journal = {Computer Vision and Image Understanding},
number = {12},
pages = {1638--1646},
title = {{Combinatorial preconditioners and multilevel solvers for problems in computer vision and image processing}},
volume = {115},
year = {2011}
}
@article{hazan2007logarithmic,
author = {Hazan, Elad and Agarwal, Amit and Kale, Satyen},
journal = {Machine Learning},
number = {2-3},
pages = {169--192},
title = {{Logarithmic Regret Algorithms for Online Convex Optimization}},
volume = {69},
year = {2007}
}
@phdthesis{das2009detecting,
author = {Das, Kaustav},
school = {Carnegie Mellon University},
title = {{Detecting Patterns of Anomalies}},
url = {http://www.cs.cmu.edu/{~}kaustav/thesis/kaustav{\_}thesis.pdf},
year = {2009}
}
@article{boutilier1999decision-theoretic,
author = {Boutilier, Craig and Dean, Thomas and Hanks, Steve},
journal = {Journal of Artificial Intelligence Research},
pages = {1--94},
title = {{Decision-Theoretic Planning: Structural Assumptions and Computational Leverage}},
volume = {11},
year = {1999}
}
@book{strongin2000global,
author = {Strongin, Roman and Sergeyev, Yaroslav},
isbn = {9780792364900},
publisher = {Springer},
series = {Nonconvex Optimization and Its Applications},
title = {{Global Optimization with Non-Convex Constraints: Sequential and Parallel Algorithms}},
url = {http://books.google.fr/books?id=xh{\_}GF9Dor3AC},
year = {2000}
}
@incollection{JN11a,
author = {Juditsky, A and Nemirovski, A},
booktitle = {Optimization for Machine Learning},
editor = {Sra, S and Nowozin, S and Wright, S},
pages = {121--147},
publisher = {MIT press},
title = {{First-Order Methods for Nonsmooth Convex Large-Scale Optimization, I: General Purpose Methods}},
year = {2011}
}
@article{chambolle2009total,
author = {Chambolle, A and Darbon, J},
journal = {International Journal of Computer Vision},
number = {3},
pages = {288--307},
publisher = {Springer},
title = {{On total variation minimization and surface evolution using parametric maximum flows}},
volume = {84},
year = {2009}
}
@inproceedings{AweKlein04,
author = {Awerbuch, Baruch and Kleinberg, Robert D},
pages = {45--53},
title = {{Adaptive routing with end-to-end feedback: distributed learning and geometric approaches}}
}
@article{fot,
annote = {To appear},
author = {Bach, F and Jenatton, R and Mairal, J and Obozinski, G},
journal = {Foundations and Trends{\{}$\backslash$textregistered{\}} in Machine Learning},
publisher = {Now Publishers Inc.},
title = {{Optimization with sparsity-inducing penalties}},
year = {2011}
}
@inproceedings{HuPo04,
author = {Hutter, Marcus and Poland, Jan},
booktitle = {Algorithmic Learning Theory},
pages = {279--293},
title = {{Prediction with Expert Advice by Following the Perturbed Leader for General Weights}},
year = {2004}
}
@article{khachiyan1979polynomial,
author = {Khachiyan, Leonid},
journal = {Doklady Akademii Nauk SSSR},
pages = {1093--1096},
title = {{A Polynomial Algorithm in Linear Programming}},
volume = {244},
year = {1979}
}
@inproceedings{park2003solving,
author = {Park, James and Darwiche, Adnan},
booktitle = {Proceedings of the 19th Conference on Uncertainty in Artificial Intelligence},
pages = {459--468},
title = {{Solving {\{}MAP{\}} Exactly Using Systematic Search}},
year = {2003}
}
@article{girvan2002community,
abstract = {A number of recent studies have focused on the statistical properties of networked systems such as social networks and the Worldwide Web. Researchers have concentrated particularly on a few properties that seem to be common to many networks: the small-world property, power-law degree distributions, and network transitivity. In this article, we highlight another property that is found in many networks, the property of community structure, in which network nodes are joined together in tightly knit groups, between which there are only looser connections. We propose a method for detecting such communities, built around the idea of using centrality indices to find community boundaries. We test our method on computer-generated and real-world graphs whose community structure is already known and find that the method detects this known structure with high sensitivity and reliability. We also apply the method to two networks whose community structure is not well known--a collaboration network and a food web--and find that it detects significant and informative community divisions in both cases.},
author = {Girvan, Michelle and Newman, Mark E J},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Girvan, Newman - 2002 - Community structure in social and biological networks.pdf:pdf},
journal = {National Academy of Sciences of the United States of America},
keywords = {Algorithms,Animals,Community Networks,Computer Simulation,Humans,Models,Nerve Net,Nerve Net: physiology,Neural Networks (Computer),Social Behavior,Theoretical},
number = {12},
pages = {7821--6},
title = {{Community structure in social and biological networks.}},
volume = {99},
year = {2002}
}
@inproceedings{drineas2005nystr$o$m,
author = {Drineas, P and Mahoney, M W},
booktitle = {Proceedings of COLT, 2005},
title = {{On the {\{}N{\}}ystr{\{}$\backslash$ddot{\{}$\backslash$text{\{}o{\}}{\}}{\}}m method for approximating a {\{}G{\}}ram matrix for improved kernel-based learning}},
year = {2005}
}
@article{gale2003bayesian,
abstract = {We extend the standard model of social learning in two ways. First, we introduce a social network and assume that agents can only observe the actions of agents to whom they are connected by this network. Secondly, we allow agents to choose a different action at each date. If the network satisfies a connectedness assumption, the initial diversity resulting from diverse private information is eventually replaced by uniformity of actions, though not necessarily of beliefs, in finite time with probability one. We look at particular networks to illustrate the impact of network architecture on speed of convergence and the optimality of absorbing states. Convergence is remarkably rapid, so that asymptotic results are a good approximation even in the medium run. {\textcopyright} 2003 Elsevier Inc. All rights reserved.},
author = {Gale, Douglas and Kariv, Shachar},
journal = {Games and Economic Behavior},
number = {2},
pages = {329--346},
title = {{Bayesian learning in social networks}},
volume = {45},
year = {2003}
}
@book{davey2002introduction,
author = {Davey, B A and Priestley, H A},
publisher = {Cambridge Univ. Press},
title = {{Introduction to Lattices and Order}},
year = {2002}
}
@inproceedings{bartlett2009regal,
address = {Arlington, Virginia, United States},
author = {Bartlett, Peter L and Tewari, Ambuj},
booktitle = {Proceedings of the 25th conference on Uncertainty in Artificial Intelligence},
isbn = {978-0-9749039-5-8},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {35--42},
publisher = {AUAI Press},
series = {UAI '09},
title = {{REGAL: a regularization based algorithm for reinforcement learning in weakly communicating MDPs}},
year = {2009}
}
@inproceedings{bilenko2004integrating,
address = {New York, NY, USA},
annote = {comps{\_}distancX},
author = {Bilenko, Mikhail and Basu, Sugato and Mooney, Raymond J},
booktitle = {ICML '04: Proceedings of the twenty-first international conference on Machine learning},
doi = {http://doi.acm.org/10.1145/1015330.1015360},
isbn = {1-58113-828-5},
pages = {11},
publisher = {ACM},
title = {{Integrating constraints and metric learning in semi-supervised clustering}},
year = {2004}
}
@techreport{asadi2016new,
abstract = {A softmax operator applied to a set of values acts somewhat like the maximization function and somewhat like an average. In sequential decision making, softmax is often used in settings where it is necessary to maximize utility but also to hedge against problems that arise from putting all of one's weight behind a single maximum utility decision. The Boltzmann softmax operator is the most commonly used softmax operator in this setting, but we show that this operator is prone to misbehavior. In this work, we study an alternative softmax operator that, among other properties, is both a non-expansion (ensuring convergent behavior in learning and planning) and differentiable (making it possible to improve decisions via gradient descent methods). We provide proofs of these properties and present empirical comparisons between various softmax operators.},
archivePrefix = {arXiv},
arxivId = {1612.05628},
author = {Asadi, Kavosh and Littman, Michael L.},
eprint = {1612.05628},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Asadi, Littman - 2016 - A new softmax operator for reinforcement learning.pdf:pdf},
title = {{A new softmax operator for reinforcement learning}},
url = {http://arxiv.org/abs/1612.05628},
year = {2016}
}
@article{BT03,
author = {Beck, A and Teboulle, M},
journal = {Operations Research Letters},
number = {3},
pages = {167--175},
title = {{Mirror descent and nonlinear projected subgradient methods for convex optimization}},
volume = {31},
year = {2003}
}
@inproceedings{propp1998coupling,
abstract = {The Markov chain Monte Carlo method is a general technique for obtaining samples from a probability distribution. In earlier work, we showed that for many applications one can modify the Markov chain Monte Carlo method so as to remove all bias in the output resulting from the biased choice of an initial state for the chain; we have called this method coupling from the past (CFTP). Here we describe this method in a fashion that should make our ideas accessible to researchers from diverse areas. Our expository strategy is to avoid proofs and focus on sample applications.},
author = {Propp, James and Wilson, David},
booktitle = {Microsurveys in Discrete Probability},
title = {{Coupling from the past: A user's guide}},
year = {1998}
}
@book{Brent1973,
abstract = {Outstanding text for graduate students and research workers proposes improvements to existing algorithms, extends their related mathematical theories, and offers details on new algorithms for approximating local and global minima. Many numerical examples, along with complete analysis of rate of convergence for most of the algorithms and error bounds that allow for the effect of rounding errors.},
author = {Brent, R P},
booktitle = {Book},
chapter = {4},
editor = {Englewood, N and Cliffs, J},
isbn = {0486419983},
pages = {195},
publisher = {Prentice-Hall},
series = {Prentice-Hall series in automatic computation},
title = {{Algorithms for minimization without derivatives}},
url = {http://www.cs.ox.ac.uk/people/richard.brent/pd/rpb011a.pdf},
year = {1973}
}
@inproceedings{Pomerleau_1989_2055,
author = {Pomerleau, Dean},
pages = {305--313},
title = {{{\{}ALVINN{\}}: An Autonomous Land Vehicle in a Neural Network}}
}
@inproceedings{talwalkar2008large-scale,
author = {Talwalkar, Ameet and Kumar, Sanjiv and Rowley, Henry A},
booktitle = {Computer Vision and Pattern Recognition (CVPR)},
title = {{Large-Scale Manifold Learning}},
year = {2008}
}
@article{hary12cooperation,
author = {Hartmann, B and D{\'{a}}n, A},
journal = {IEEE Transactions on Sustainable Energy},
number = {1},
pages = {49--56},
title = {{Cooperation of a grid-connected wind farm and an energy storage unit demonstration of a simulation tool}},
volume = {3},
year = {2012}
}
@article{gine2010adaptive,
author = {Gin{\'{e}}, Evarist and Nickl, Richard},
journal = {Bernoulli},
number = {4},
pages = {1137--1163},
title = {{Adaptive estimation of a distribution function and its density in sup-norm loss by wavelet and spline projections}},
volume = {16},
year = {2010}
}
@article{yu09ArbitraryRewards,
address = {Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA},
author = {Yu, Jia Yuan and Mannor, Shie and Shimkin, Nahum},
doi = {http://dx.doi.org/10.1287/moor.1090.0397},
issn = {0364-765X},
journal = {Mathematics of Operations Research},
number = {3},
pages = {737--757},
publisher = {INFORMS},
title = {{{\{}M{\}}arkov Decision Processes with Arbitrary Reward Processes}},
volume = {34},
year = {2009}
}
@inproceedings{rubin2005auctioning,
address = {New York, NY, USA},
author = {Rubin, Shai and Christodorescu, Mihai and Ganapathy, Vinod and Giffin, Jonathon T and Kruger, Louis and Wang, Hao and Kidd, Nicholas},
booktitle = {Proceedings of the 12th ACM conference on Computer and communications security},
isbn = {1-59593-226-7},
keywords = {anomaly detection,auction,reputation system,shilling},
pages = {270--279},
publisher = {ACM},
series = {CCS '05},
title = {{An auctioning reputation system based on anomaly detection}},
year = {2005}
}
@article{tu2015bandit,
author = {Tu, Shi-Tao and Zhu, Juan-Lan},
journal = {Journal of Shanghai Jiaotong University (Science)},
month = {may},
number = {5},
pages = {535--539},
title = {{A bandit method using probabilistic matrix factorization in recommendation}},
volume = {20},
year = {2015}
}
@book{bertsimas1997introduction,
author = {Bertsimas, Dimitris and Tsitsiklis, John},
publisher = {Athena Scientific},
title = {{Introduction to linear optimization}},
year = {1997}
}
@article{schrijver2000combinatorial,
author = {Schrijver, A},
journal = {Journal of Combinatorial Theory, Series B},
number = {2},
pages = {346--355},
publisher = {Elsevier},
title = {{A combinatorial algorithm minimizing submodular functions in strongly polynomial time}},
volume = {80},
year = {2000}
}
@incollection{combettes2010proximal,
author = {Combettes, P L and Pesquet, J C},
chapter = {Proximal S},
publisher = {New York: Springer-Verlag},
title = {{Fixed-Point Algorithms for Inverse Problems in Science and Engineering}},
year = {2010}
}
@article{HW98,
author = {Herbster, M and Warmuth, M},
journal = {Machine Learning},
pages = {151--178},
title = {{Tracking the Best Expert}},
volume = {32},
year = {1998}
}
@inproceedings{narasimhan2007local,
author = {Narasimhan, M and Bilmes, J},
booktitle = {Proc. IJCAI},
title = {{Local search for balanced submodular clusterings}},
year = {2007}
}
@article{bolton2002statistical,
abstract = {Summary: Fraud is increasing dramatically with the expansion of modern
technology and the global superhighways of communication, resulting
in the loss of billions of dollars worldwide each year. Although
prevention technologies are the best way to reduce fraud, fraudsters
are adaptive and, given time, will usually find ways to circumvent
such measures. Methodologies for the detection of fraud are essential
if we are to catch fraudsters once fraud prevention has failed. Statistics
and machine learning provide effective technologies for fraud detection
and have been applied successfully to detect activities such as money
laundering, e-commerce credit card fraud, telecommunications fraud
and computer intrusion, to name but a few. We describe the tools
available for statistical fraud detection and the areas in which
fraud detection technologies are most used.},
annote = {comps{\_}ano},
author = {Bolton, Richard J and Hand, David J},
doi = {doi:10.1214/ss/1042727940},
journal = {Stat. Sci.},
keywords = {fraud detection,fraud prevention,machine learnin},
number = {3},
pages = {235--255},
title = {{Statistical fraud detection: a review.}},
volume = {17},
year = {2002}
}
@inproceedings{zolghadr2013online,
author = {Zolghadr, Navid and Bartok, Gabor and Greiner, Russell and Gy{\"{o}}rgy, Andr{\'{a}}s and Szepesvari, Csaba},
booktitle = {Advances in Neural Information Processing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Zolghadr et al. - 2013 - Online Learning with Costly Features and Labels.pdf:pdf},
pages = {1241--1249},
title = {{Online Learning with Costly Features and Labels}},
url = {http://papers.nips.cc/paper/5149-online-learning-with-costly-features-and-labels},
year = {2013}
}
@inproceedings{cesa-bianchi2016delay,
abstract = {We study networks of communicating learning agents that cooperate to solve a common nonstochastic bandit problem. Agents use an underlying communication network to get messages about actions selected by other agents, and drop messages that took more than {\$}d{\$} hops to arrive, where {\$}d{\$} is a delay parameter. We introduce $\backslash$textsc{\{}Exp3-Coop{\}}, a cooperative version of the {\{}$\backslash$sc Exp3{\}} algorithm and prove that with {\$}K{\$} actions and {\$}N{\$} agents the average per-agent regret after {\$}T{\$} rounds is at most of order {\$}\backslashsqrt{\{}\backslashbigl(d+1 + \backslashtfrac{\{}K{\}}{\{}N{\}}\backslashalpha{\_}{\{}\backslashle d{\}}\backslashbigr)(T\backslashln K){\}}{\$}, where {\$}\backslashalpha{\_}{\{}\backslashle d{\}}{\$} is the independence number of the {\$}d{\$}-th power of the connected communication graph {\$}G{\$}. We then show that for any connected graph, for {\$}d=\backslashsqrt{\{}K{\}}{\$} the regret bound is {\$}K{\^{}}{\{}1/4{\}}\backslashsqrt{\{}T{\}}{\$}, strictly better than the minimax regret {\$}\backslashsqrt{\{}KT{\}}{\$} for noncooperating agents. More informed choices of {\$}d{\$} lead to bounds which are arbitrarily close to the full information minimax regret {\$}\backslashsqrt{\{}T\backslashln K{\}}{\$} when {\$}G{\$} is dense. When {\$}G{\$} has sparse components, we show that a variant of $\backslash$textsc{\{}Exp3-Coop{\}}, allowing agents to choose their parameters according to their centrality in {\$}G{\$}, strictly improves the regret. Finally, as a by-product of our analysis, we provide the first characterization of the minimax regret for bandit learning with delay.},
author = {Cesa-Bianchi, Nicol{\`{o}} and Gentile, Claudio and Mansour, Yishay and Minora, Alberto},
booktitle = {Conference on Learning Theory},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Cesa-Bianchi et al. - 2016 - Delay and cooperation in nonstochastic bandits.pdf:pdf},
title = {{Delay and cooperation in nonstochastic bandits}},
year = {2016}
}
@article{kolar2010estimating,
author = {Kolar, Mladen and Song, Le and Ahmed, Amr and Xing, Eric P},
doi = {10.1214/09-AOAS308},
journal = {Annals of Applied Statistics},
pages = {94--123},
title = {{Estimating time-varying networks}},
volume = {4},
year = {2010}
}
@inproceedings{charniak05ctf,
address = {Morristown, NJ, USA},
author = {Charniak, Eugene and Johnson, Mark},
booktitle = {ACL '05: Proceedings of the 43rd Annual Meeting of the Association for Computational Linguistics},
doi = {http://dx.doi.org/10.3115/1219840.1219862},
pages = {173--180},
publisher = {Association for Computational Linguistics},
title = {{Coarse-to-fine n-best parsing and {\{}M{\}}ax{\{}E{\}}nt discriminative reranking}},
year = {2005}
}
@inproceedings{engel2002sparse,
abstract = {We present a novel algorithm for sparse online greedy kernel- based nonlinear regression. This algorithm improves current approaches to kernel-based regression in two aspects. First, it operates online - at each time step it observes a single new input sample, performs an update and discards it. Second, the solution maintained is extremely sparse. This is achieved by an explicit greedy sparsification process that admits into the kernel representation a new input sample only if its feature space image is linearly independent of the images of previously admitted samples. We show that the algorithm implements a form of gradient ascent and demonstrate its scaling and noise tolerance properties on three benchmark regression problems.},
author = {Engel, Yaakov and Mannor, Shie and Meir, Ron},
booktitle = {European Conference on Machine Learning},
title = {{Sparse online greedy support vector regression}},
year = {2002}
}
@article{Martinet1978,
author = {Martinet, B},
journal = {ESAIM: Mathematical Modelling and Numerical Analysis - Mod{\'{e}}lisation Math{\'{e}}matique et Analyse Num{\'{e}}rique},
number = {2},
pages = {153--171},
publisher = {EDP Sciences},
title = {{Perturbation des m{\'{e}}thodes d'optimisation. Applications}},
url = {http://eudml.org/doc/193317},
volume = {12},
year = {1978}
}
@inproceedings{sutton2000policy,
author = {Sutton, Richard and McAllester, David and Singh, Satinder and Mansour, Yishay},
booktitle = {Advances in Neural Information Processing Systems 12},
pages = {1057--1063},
title = {{Policy Gradient Methods for Reinforcement Learning with Function Approximation}},
year = {2000}
}
@inproceedings{mahadevan2005samuel,
author = {Mahadevan, Sridhar},
booktitle = {Proceedings of the 20th National Conference on Artificial Intelligence},
pages = {1000--1005},
title = {{Samuel Meets {\{}Amarel{\}}: Automating Value Function Approximation Using Global State Space Analysis}},
year = {2005}
}
@article{sutton1988learning,
author = {Sutton, Richard},
journal = {Machine Learning},
pages = {9--44},
title = {{Learning to Predict by the Methods of Temporal Differences}},
volume = {3},
year = {1988}
}
@incollection{ZiNe13,
author = {Zimin, A and Neu, G},
booktitle = {Advances in Neural Information Processing Systems 26},
editor = {Burges, C J C and Bottou, L and Welling, M and Ghahramani, Z and Weinberger, K Q},
pages = {1583--1591},
title = {{Online learning in episodic {\{}M{\}}arkovian decision processes by relative entropy policy search}},
year = {2013}
}
@article{Yun2014,
abstract = {In this paper, we consider networks consisting of a finite number of non-overlapping communities. To extract these communities, the interaction between pairs of nodes may be sampled from a large available data set, which allows a given node pair to be sampled several times. When a node pair is sampled, the observed outcome is a binary random variable, equal to 1 if nodes interact and to 0 otherwise. The outcome is more likely to be positive if nodes belong to the same communities. For a given budget of node pair samples or observations, we wish to jointly design a sampling strategy (the sequence of sampled node pairs) and a clustering algorithm that recover the hidden communities with the highest possible accuracy. We consider both non-adaptive and adaptive sampling strategies, and for both classes of strategies, we derive fundamental performance limits satisfied by any sampling and clustering algorithm. In particular, we provide necessary conditions for the existence of algorithms recovering the communities accurately as the network size grows large. We also devise simple algorithms that accurately reconstruct the communities when this is at all possible, hence proving that the proposed necessary conditions for accurate community detection are also sufficient. The classical problem of community detection in the stochastic block model can be seen as a particular instance of the problems consider here. But our framework covers more general scenarios where the sequence of sampled node pairs can be designed in an adaptive manner. The paper provides new results for the stochastic block model, and extends the analysis to the case of adaptive sampling.},
archivePrefix = {arXiv},
arxivId = {1402.3072},
author = {Yun, Se-Young and Proutiere, Alexandre},
eprint = {1402.3072},
month = {feb},
title = {{Community Detection via Random and Adaptive Sampling}},
url = {http://arxiv.org/abs/1402.3072},
year = {2014}
}
@article{grotschel1981ellipsoid,
abstract = {L. G. Khachiyan recently published a polynomial algorithm to check feasibility of a system of linear inequalities. The method is an adaptation of an algorithm proposed by Shor for non-linear optimization problems. In this paper we show that the method also yields interesting results in combinatorial optimization. Thus it yields polynomial algorithms for vertex packing in perfect graphs; for the matching and matroid intersection problems; for optimum covering of directed cuts of a digraph; for the minimum value of a submodular set function; and for other important combinatorial problems. On the negative side, it yields a proof that weighted fractional chromatic number is NP-hard.},
author = {Gr{\"{o}}tschel, M and Lov{\'{a}}sz, L and Schrijver, A},
doi = {10.1007/BF02579273},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Gr{\"{o}}tschel, Lov{\'{a}}sz, Schrijver - 1981 - The ellipsoid method and its consequences in combinatorial optimization.pdf:pdf},
issn = {02099683},
journal = {Combinatorica},
number = {2},
pages = {169--197},
publisher = {Springer},
title = {{The ellipsoid method and its consequences in combinatorial optimization}},
url = {http://www.springerlink.com/index/10.1007/BF02579273},
volume = {1},
year = {1981}
}
@article{goldberg2009multi-manifold,
author = {Goldberg, Andrew B and Zhu, Xiaojin and Singh, Aarti and Xu, Zhiting and Nowak, Robert},
journal = {Journal of Machine Learning Research},
pages = {169--176},
title = {{Multi-Manifold Semi-Supervised Learning}},
volume = {5},
year = {2009}
}
@book{shawe2004kernel,
author = {Shawe-Taylor, John and Cristianini, Nelo},
publisher = {Cambridge University Press},
title = {{Kernel methods for pattern analysis}},
year = {2004}
}
@article{barto1983neuronlike,
author = {Barto, Andrew and Sutton, Richard and Anderson, Charles},
journal = {IEEE Transactions on Systems, Man, and Cybernetics},
number = {5},
pages = {835--846},
title = {{Neuronlike Elements that Can Solve Difficult Learning Control Problems}},
volume = {13},
year = {1983}
}
@article{cornuejols1977uncapacitated,
author = {Cornuejols, G and Fisher, M and Nemhauser, G L},
journal = {Annals of Discrete Mathematics},
pages = {163--177},
publisher = {Elsevier},
title = {{On the Uncapacitated Location Problem}},
volume = {1},
year = {1977}
}
@inproceedings{kujala07perturbed,
author = {Kujala, Jussi and Elomaa, Tapio},
doi = {http://dx.doi.org/10.1007/978-3-540-75225-7_16},
pages = {166--180},
title = {{Following the Perturbed Leader to Gamble at Multi-armed Bandits}}
}
@article{hildebrand2014canonical,
abstract = {On the interior of a regular convex cone K in n-dimensional real space there exist two canonical Hessian metrics, the one generated by the logarithm of the characteristic function, and the Cheng-Yau metric. The former is associated with a self-concordant logarithmically homogeneous barrier on K, the universal barrier. It is invariant with respect to the unimodular automorphism subgroup of K and is compatible with the operation of taking product cones, but in general it does not behave well under duality. Here we introduce a barrier associated with the Cheng-Yau metric, the canonical barrier. It shares with the universal barrier the invariance, existence, and uniqueness properties and is compatible with the operation of taking product cones, but in addition is well behaved under duality. The canonical barrier can be characterized as the convex solution of the partial differential equation log det F? = 2F that tends to infinity as the argument tends to the boundary of K. Its barrier parameter does not exceed the dimension n of the cone. On homogeneous cones both barriers essentially coincide.},
author = {Hildebrand, Roland},
journal = {Mathematics of Operations Research},
number = {3},
pages = {841--850},
title = {{Canonical barriers on convex cones}},
volume = {39},
year = {2014}
}
@phdthesis{kveton2006planning,
author = {Kveton, Branislav},
school = {University of Pittsburgh},
title = {{Planning in Hybrid Structured Stochastic Domains}},
year = {2006}
}
@inproceedings{klein2012structured,
abstract = {TBD},
address = {Edinburgh (UK)},
author = {Klein, Edouard and PIOT, Bilal and Geist, Matthieu and Pietquin, Olivier},
booktitle = {European Workshop on Reinforcement Learning (EWRL 2012)},
month = {jun},
title = {{Structured Classification for Inverse Reinforcement Learning}},
url = {http://ewrl.files.wordpress.com/2011/12/ewrl2012{\_}submission{\_}30.pdf},
year = {2012}
}
@phdthesis{gordon1999approximate,
author = {Gordon, Geoffrey},
school = {Carnegie Mellon University},
title = {{Approximate Solutions to {\{}Markov{\}} Decision Processes}},
year = {1999}
}
@inproceedings{crites1996improving,
author = {Crites, Robert and Barto, Andrew},
booktitle = {Advances in Neural Information Processing Systems 8},
pages = {1017--1023},
title = {{Improving Elevator Performance Using Reinforcement Learning}},
year = {1996}
}
@article{Chen1998,
author = {Chen, S S and Donoho, D L and Saunders, M A},
journal = {SIAM Journal on Scientific Computing},
number = {1},
pages = {33--61},
title = {{Atomic Decomposition by Basis Pursuit}},
volume = {20},
year = {1998}
}
@inproceedings{price-bayesian,
author = {Price, Bob and Boutilier, Craig},
pages = {712--720},
title = {{A {\{}B{\}}ayesian Approach to Imitation in Reinforcement Learning}}
}
@inproceedings{valko2014spectral,
abstract = {Smooth functions on graphs have wide applications in manifold and semi-supervised learning. In this paper, we study a bandit problem where the payoffs of arms are smooth on a graph. This framework is suitable for solving online learning problems that involve graphs, such as content-based recommendation. In this problem, each item we can recommend is a node and its expected rating is similar to its neighbors. The goal is to recommend items that have high expected ratings. We aim for the algorithms where the cumulative regret with respect to the optimal policy would not scale poorly with the number of nodes. In particular, we introduce the notion of an effective dimension, which is small in real-world graphs, and propose two algorithms for solving our problem that scale linearly and sublinearly in this dimension. Our experiments on real-world content recommendation problem show that a good estimator of user preferences for thousands of items can be learned from just tens of nodes evaluations.},
author = {Valko, Michal and Munos, R{\'{e}}mi and Kveton, Branislav and Koc{\'{a}}k, Tom{\'{a}}{\v{s}}},
booktitle = {International Conference on Machine Learning},
title = {{Spectral bandits for smooth graph functions}},
year = {2014}
}
@article{jaksch2010near,
address = {Cambridge, MA, USA},
author = {Jaksch, Thomas and Ortner, Ronald and Auer, Peter},
issn = {1532-4435},
journal = {Journal of Machine Learning Research},
keywords = {bandits},
mendeley-tags = {bandits},
month = {aug},
pages = {1563--1600},
publisher = {MIT Press},
title = {{Near-optimal Regret Bounds for Reinforcement Learning}},
volume = {99},
year = {2010}
}
@incollection{visweswaran2005instance-specific,
address = {Cambridge, MA},
annote = {comps{\_}models},
author = {Visweswaran, Shyam and Cooper, Gregory F},
booktitle = {Advances in Neural Information Processing Systems 17},
editor = {Saul, Lawrence K and Weiss, Yair and Bottou, L{\'{e}}on},
pages = {1449--1456},
publisher = {MIT Press},
title = {{Instance-Specific {\{}B{\}}ayesian Model Averaging for Classification}},
url = {http://books.nips.cc/papers/files/nips17/NIPS2004{\_}0482.pdf},
year = {2005}
}
@article{Abernethy2008,
author = {Abernethy, Jacob and Berkeley, U C and Rakhlin, Alexander},
journal = {Online},
number = {3},
publisher = {Citeseer},
title = {{Competing in the Dark : An Efficient Algorithm for Bandit Linear Optimization}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.152.2096{\&}rep=rep1{\&}type=pdf},
volume = {3},
year = {2008}
}
@article{carpentier2013adaptive,
abstract = {We consider the problem of estimating the tail index $\backslash$alpha of a distribution satisfying a ($\backslash$alpha, $\backslash$beta) second-order Pareto-type condition, where $\backslash$beta is the second-order coefficient. When $\backslash$beta is available, it was previously proved that $\backslash$alpha can be estimated with the oracle rate n{\^{}}{\{}-$\backslash$beta/(2$\backslash$beta+1){\}}. On the contrary, when $\backslash$beta is not available, estimating $\backslash$alpha with the oracle rate is challenging; so additional assumptions that imply the estimability of $\backslash$beta are usually made. In this paper, we propose an adaptive estimator of $\backslash$alpha, and show that this estimator attains the rate (n/$\backslash$log$\backslash$log n){\^{}}{\{}-$\backslash$beta/(2$\backslash$beta+1){\}} without a priori knowledge of $\backslash$beta and any additional assumptions. Moreover, we prove that this ($\backslash$log$\backslash$log n){\^{}}{\{}$\backslash$beta/(2$\backslash$beta+1){\}} factor is unavoidable by obtaining the companion lower bound.},
author = {Carpentier, Alexandra and Kim, Arlene K. H.},
journal = {Statistica Sinica},
title = {{Adaptive and minimax optimal estimation of the tail coefficient}},
year = {2014}
}
@inproceedings{le2013fastfood,
abstract = {Despite their successes, what makes kernel methods difficult to use in many large scale problems is the fact that computing the de- cision function is typically expensive, espe- cially at prediction time. In this paper, we overcome this difficulty by proposing Fast- food, an approximation that accelerates such computation significantly. Key to Fastfood is the observation that Hadamard matri- ces when combined with diagonal Gaussian matrices exhibit properties similar to dense Gaussian random matrices. Yet unlike the latter, Hadamard and diagonal matrices are inexpensive to multiply and store. These two matrices can be used in lieu of Gaussian matrices in Random Kitchen Sinks (Rahimi {\&} Recht, 2007) and thereby speeding up the computation for a large range of ker- nel functions. Specifically, Fastfood requires O(n log d) time and O(n) storage to compute n non-linear basis functions in d dimensions, a significant improvement from O(nd) com- putation and storage, without sacrificing ac- curacy. We prove that the approximation is unbiased and has low variance. Extensive ex- periments show that we achieve similar accu- racy to full kernel expansions and Random Kitchen Sinks while being 100x faster and us- ing 1000x less memory. These improvements, especially in terms of memory usage, make kernel methods more practical for applica- tions that have large training sets and/or re- quire real-time prediction.},
author = {Le, Quoc and Sarl{\'{o}}s, Tam{\'{a}}s and Smola, Alex J},
booktitle = {International Conference on Machine Learning},
title = {{Fastfood --- Approximating kernel expansions in loglinear time}},
year = {2013}
}
@incollection{pelleg2005active,
address = {Cambridge, MA},
annote = {comps{\_}ano},
author = {Pelleg, Dan and Moore, Andrew W},
booktitle = {Advances in Neural Information Processing Systems 17},
editor = {Saul, Lawrence K and Weiss, Yair and Bottou, L{\'{e}}on},
pages = {1073--1080},
publisher = {MIT Press},
title = {{Active Learning for Anomaly and Rare-Category Detection}},
url = {http://books.nips.cc/papers/files/nips17/NIPS2004{\_}0438.pdf},
year = {2005}
}
@article{HaSe07,
author = {Hazan, E and Seshadhri, C},
journal = {Electronic Colloquium on Computational Complexity (ECCC)},
title = {{Adaptive algorithms for online decision problems}},
year = {2007}
}
@phdthesis{littman1996algorithms,
author = {Littman, Michael},
school = {Brown University},
title = {{Algorithms for Sequential Decision Making}},
year = {1996}
}
@phdthesis{valko2016bandits,
abstract = {We investigate the structural properties of certain sequential decision-making problems with limited feedback (bandits) in order to bring the known algorithmic solutions closer to a practical use. In the first part, we put a special emphasis on structures that can be represented as graphs on actions, in the second part we study the large action spaces that can be of exponential size in the number of base actions or even infinite. We show how to take advantage of structures over the actions and (provably) learn faster.},
author = {Valko, Michal},
school = {{\'{E}}cole normale sup{\'{e}}rieure de Cachan},
title = {{Bandits on graphs and structures}},
type = {habilitation},
year = {2016}
}
@inproceedings{littlestone1991on-line,
author = {Littlestone, Nick and Long, Philip and Warmuth, Manfred},
booktitle = {Proceedings of the 23rd Annual ACM Symposium on Theory of Computing},
pages = {465--475},
title = {{On-Line Learning of Linear Functions}},
year = {1991}
}
@inproceedings{koller2000policy,
author = {Koller, Daphne and Parr, Ronald},
booktitle = {Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence},
pages = {326--334},
title = {{Policy Iteration for Factored {\{}MDPs{\}}}},
year = {2000}
}
@inproceedings{samothrakis2013training,
author = {Samothrakis, Spyridon and Perez, Diego and Lucas, Simon},
booktitle = {NIPS Workshop on Causality},
month = {jan},
title = {{Training gradient boosting machines using curve-fitting and information-theoretic features for causal direction detection}},
year = {2013}
}
@phdthesis{valko2005evolving,
abstract = {Real biological networks are able to make decisions. We will show
that this behavior can be observed even in some simple architectures
of biologically plausible neural models. The great interest of this
thesis is also to contribute to methods of statistical decision theory
by giving a lead how to evolve the neural networks to solve miscellaneous
decision tasks.},
author = {Valko, Michal},
keywords = {misovalko},
mendeley-tags = {misovalko},
month = {jun},
school = {Comenius University, Bratislava, Slovakia},
title = {{Evolving Neural Networks for Statistical Decision Theory}},
year = {2005}
}
@article{ghashami2016frequent,
abstract = {We describe a new algorithm called Frequent Directions for deterministic matrix sketching in the row-updates model. The algorithm is presented an arbitrary input matrix {\$}A \backslashin R{\^{}}{\{}n \backslashtimes d{\}}{\$} one row at a time. It performed {\$}O(d \backslashtimes \backslashell){\$} operations per row and maintains a sketch matrix {\$}B \backslashin R{\^{}}{\{}\backslashell \backslashtimes d{\}}{\$} such that for any {\$}k {\textless} \backslashell{\$} {\$}\backslash|A{\^{}}TA - B{\^{}}TB \backslash|{\_}2 \backslashleq \backslash|A - A{\_}k\backslash|{\_}F{\^{}}2 / (\backslashell-k){\$} and {\$}\backslash|A - \backslashpi{\_}{\{}B{\_}k{\}}(A)\backslash|{\_}F{\^{}}2 \backslashleq \backslashbig(1 + \backslashfrac{\{}k{\}}{\{}\backslashell-k{\}}\backslashbig) \backslash|A-A{\_}k\backslash|{\_}F{\^{}}2 {\$} . Here, {\$}A{\_}k{\$} stands for the minimizer of {\$}\backslash|A - A{\_}k\backslash|{\_}F{\$} over all rank {\$}k{\$} matrices (similarly {\$}B{\_}k{\$}) and {\$}\backslashpi{\_}{\{}B{\_}k{\}}(A){\$} is the rank {\$}k{\$} matrix resulting from projecting {\$}A{\$} on the row span of {\$}B{\_}k{\$}. We show both of these bounds are the best possible for the space allowed. The summary is mergeable, and hence trivially parallelizable. Moreover, Frequent Directions outperforms exemplar implementations of existing streaming algorithms in the space-error tradeoff.},
author = {Ghashami, Mina and Liberty, Edo and Phillips, Jeff M and Woodruff, David P.},
journal = {The SIAM Journal of Computing},
pages = {1--28},
title = {{Frequent directions: Simple and deterministic matrix sketching}},
year = {2016}
}
@article{golovin2011adaptive,
author = {{Daniel Golovin} and {Andreas Krause}},
journal = {Journal of Artificial Intelligence Research (JAIR)},
pages = {427--486},
title = {{Adaptive Submodularity: Theory and Applications in Active Learning and Stochastic Optimization}},
volume = {42},
year = {2011}
}
@inproceedings{HKW10,
author = {Hazan, E and Kale, S and Warmuth, M},
booktitle = {Proceedings of the 23rd Annual Conference on Learning Theory (COLT)},
pages = {144--154},
title = {{Learning rotations with little regret}},
year = {2010}
}
@article{Nem79,
annote = {(In Russian)},
author = {Nemirovski, A},
journal = {Ekonomika i Matematicheskie Metody},
title = {{Efficient methods for large-scale convex optimization problems}},
volume = {15},
year = {1979}
}
@inproceedings{Kalai03efficient,
address = {New York, NY, USA},
author = {Kalai, A and Vempala, S},
booktitle = {Proceedings of the 16th Annual Conference on Learning Theory and the 7th Kernel Workshop, COLT-Kernel 2003},
editor = {Sch{\"{o}}lkopf, B and Warmuth, M},
pages = {26--40},
publisher = {Springer},
title = {{Efficient algorithms for the online decision problem}},
year = {2003}
}
@book{borwein2006caa,
author = {Borwein, J M and Lewis, A S},
publisher = {Springer},
title = {{Convex Analysis and Nonlinear Optimization: Theory and Examples}},
year = {2006}
}
@article{even-dar09OnlineMDP,
address = {Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA},
author = {Even-Dar, Eyal and Kakade, Sham. M and Mansour, Yishay},
doi = {http://dx.doi.org/10.1287/moor.1090.0396},
issn = {0364-765X},
journal = {Mathematics of Operations Research},
number = {3},
pages = {726--736},
publisher = {INFORMS},
title = {{Online {\{}M{\}}arkov Decision Processes}},
volume = {34},
year = {2009}
}
@inproceedings{black92,
author = {Black, Ezra},
booktitle = {{\{}LINGUIST{\}} {\{}L{\}}ist 3.587, $\backslash$texttt{\{}http://www.linguistlist.org/issues/3/3-587.html{\}}},
title = {{Meeting of interest group on evaluation of broad-coverage parsers of English}},
year = {1992}
}
@article{zhou2004learning,
author = {Zhou, D and Bousquet, O and Lal, T N and Weston, J and Scholkopf, B},
journal = {Advances in Neural Information Processing Systems},
keywords = {manifold{\_}learning},
pages = {321--328},
title = {{Learning with local and global consistency}},
volume = {16},
year = {2004}
}
@article{kumar2012sampling,
author = {Kumar, Sanjiv and Mohri, Mehryar and Talwalkar, Ameet},
journal = {J. Mach. Learn. Res.},
number = {1},
pages = {981--1006},
title = {{Sampling Methods for the Nystr{\"{o}}m Method}},
volume = {13},
year = {2012}
}
@techreport{vaswani2016adaptive,
abstract = {Most previous work on influence maximization in social networks is limited to the non-adaptive setting in which the marketer is supposed to select all of the seed users, to give free samples or discounts to, up front. A disadvantage of this setting is that the marketer is forced to select all the seeds based solely on a diffusion model. If some of the selected seeds do not perform well, there is no opportunity to course-correct. A more practical setting is the adaptive setting in which the marketer initially selects a batch of users and observes how well seeding those users leads to a diffusion of product adoptions. Based on this market feedback, she formulates a policy for choosing the remaining seeds. In this paper, we study adaptive offline strategies for two problems: (a) MAXSPREAD -- given a budget on number of seeds and a time horizon, maximize the spread of influence and (b) MINTSS -- given a time horizon and an expected number of target users to be influenced, minimize the number of seeds that will be required. In particular, we present theoretical bounds and empirical results for an adaptive strategy and quantify its practical benefit over the non-adaptive strategy. We evaluate adaptive and non-adaptive policies on three real data sets. We conclude that while benefit of going adaptive for the MAXSPREAD problem is modest, adaptive policies lead to significant savings for the MINTSS problem.},
archivePrefix = {arXiv},
arxivId = {1604.08171},
author = {Vaswani, Sharan and Lakshmanan, Laks V. S.},
eprint = {1604.08171},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Vaswani, Lakshmanan - 2016 - Adaptive influence maximization in social networks Why commit when you can adapt.pdf:pdf},
title = {{Adaptive influence maximization in social networks: Why commit when you can adapt?}},
year = {2016}
}
@article{Rapaport2008,
author = {Rapaport, F and Barillot, E and Vert, J.-P.},
journal = {Bioinformatics},
month = {jul},
number = {13},
pages = {i375----i382},
title = {{Classification of array{\{}CGH{\}} data using fused {\{}SVM{\}}}},
volume = {24},
year = {2008}
}
@article{Martinet1970,
author = {Martinet, B},
journal = {ESAIM: Mathematical Modelling and Numerical Analysis - Mod{\'{e}}lisation Math{\'{e}}matique et Analyse Num{\'{e}}rique},
number = {R3},
pages = {154--158},
publisher = {EDP Sciences},
title = {{R{\'{e}}gularisation d'in{\'{e}}quations variationnelles par approximations successives}},
volume = {4},
year = {1970}
}
@article{meila2001random,
abstract = {We present a new view of clustering and segmentation by pairwise similarities. We interpret the similarities as edge flows in a Markov random walk and study the eigenvalues and eigenvectors of the walk's transition matrix. This view shows that spectral methods for clustering and segmentation have a probabilistic foundation. We prove that the Normalized Cut method arises naturally from our framework and we provide a complete characterization of the cases when the Normalized Cut algorithm is exact. Then we discuss other spectral segmentation and clustering methods showing that several of them are essentially the same as NCut.},
author = {Meila, M. and Meila, M. and Shi, J. and Shi, J.},
journal = {International Conference on Artificial Intelligence and Statistics},
title = {{A random walks view of spectral segmentation}},
year = {2001}
}
@book{bellman1957dynamic,
address = {Princeton, NJ},
author = {Bellman, Richard},
publisher = {Princeton University Press},
title = {{Dynamic Programming}},
year = {1957}
}
@article{CT06,
author = {Cand{\`{e}}s, E J and Tao, T},
journal = {IEEE Transactions on Information Theory},
number = {12},
pages = {5406--5425},
title = {{Near-optimal signal recovery from random projections: universal encoding strategies?}},
volume = {52},
year = {2006}
}
@inproceedings{guestrin2001max-norm,
author = {Guestrin, Carlos and Koller, Daphne and Parr, Ronald},
booktitle = {Proceedings of the 17th International Joint Conference on Artificial Intelligence},
pages = {673--682},
title = {{Max-Norm Projections for Factored {\{}MDPs{\}}}},
year = {2001}
}
@article{ziebart2012probabilistic,
abstract = {Numerous interaction techniques have been developed that make "virtual" pointing at targets in graphical user interfaces easier than analogous physical pointing tasks by invoking target-based interface modifications. These pointing facilitation techniques crucially depend on methods for estimating the relevance of potential targets. Unfortunately, many of the simple methods employed to date are inaccurate in common settings with many selectable targets in close proximity. In this paper, we bring recent advances in statistical machine learning to bear on this underlying target relevance estimation problem. By framing past target-driven pointing trajectories as approximate solutions to well-studied control problems, we learn the probabilistic dynamics of pointing trajectories that enable more accurate predictions of intended targets.},
author = {Ziebart, Brian D and Dey, Anind K and Bagnell, J Andrew},
doi = {10.1145/2166966.2166968},
isbn = {9781450310482},
journal = {Proceedings of the 2012 ACM international conference on Intelligent User Interfaces IUI 12},
pages = {1},
publisher = {ACM Press},
series = {IUI '12},
title = {{Probabilistic Pointing Target Prediction via Inverse Optimal Control}},
url = {http://dl.acm.org/citation.cfm?doid=2166966.2166968},
year = {2012}
}
@article{WK14,
author = {Warmuth, Manfred and Koolen, Wouter},
journal = {COLT 2014 open problem},
title = {{Shifting experts on easy data}},
year = {2014}
}
@book{howard1960dynamic,
address = {Cambridge, MA},
author = {Howard, Ronald},
publisher = {MIT Press},
title = {{Dynamic Programming and {\{}Markov{\}} Processes}},
year = {1960}
}
@article{chollet1997some,
author = {Chollet, J},
journal = {American Mathematical Monthly},
number = {7},
pages = {609--617},
publisher = {Mathematical Association of America},
title = {{Some inequalities for principal submatrices}},
volume = {104},
year = {1997}
}
@inproceedings{bartlett05exponentiated,
author = {Bartlett, Peter L and Collins, Michael and Taskar, Ben and McAllester, David},
pages = {113--120},
title = {{Exponentiated Gradient Algorithms for Large-margin Structured Classification}}
}
@article{ghavamzadeh2016bayesian,
abstract = {Policy gradient methods are reinforcement learning algorithms that adapt a parameterized policy by following a performance gradient estimate. Many conventional policy gradient methods use Monte-Carlo techniques to estimate this gradient. The policy is improved by adjusting the parameters in the direction of the gradient estimate. Since Monte-Carlo methods tend to have high variance, a large number of samples is required to attain accurate estimates, resulting in slow convergence. In this paper, we first propose a Bayesian framework for policy gradient, based on modeling the policy gradient as a Gaussian process. This reduces the number of samples needed to obtain accurate gradient estimates. Moreover, estimates of the natural gradient as well as a measure of the uncertainty in the gradient estimates, namely, the gradient covariance, are provided at little extra cost. Since the proposed Bayesian framework considers system trajectories as its basic observable unit, it does not require the dynamics within trajectories to be of any particular form, and thus, can be easily extended to partially observable problems. On the downside, it cannot take advantage of the Markov property when the system is Markovian. To address this issue, we proceed to supplement our Bayesian policy gradient framework with a new actor-critic learning model in which a Bayesian class of non- parametric critics, based on Gaussian process temporal difference learning, is used. Such critics model the action- value function as a Gaussian process, allowing Bayes' rule to be used in computing the posterior distribution over action-value functions, conditioned on the observed data. Appropriate choices of the policy parameterization and of the prior covariance (kernel) between action-values allow us to obtain closed-form expressions for the posterior distribution of the gradient of the expected return with respect to the policy parameters. We perform detailed experimental comparisons of the proposed Bayesian policy gradient and actor-critic algorithms with classic Monte-Carlo based policy gradient methods, as well as with each other, on a number of reinforcement learning problems.},
author = {Ghavamzadeh, Mohammad and Engel, Yaakov and Valko, Michal},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Ghavamzadeh, Engel, Valko - 2016 - Bayesian policy gradient and actor-critic algorithms.pdf:pdf},
journal = {Journal of Machine Learning Research},
number = {66},
pages = {1--53},
title = {{Bayesian policy gradient and actor-critic algorithms}},
volume = {17},
year = {2016}
}
@inproceedings{herbster1995tracking,
author = {Herbster, Mark and Warmuth, Manfred},
booktitle = {Proceedings of the 12th International Conference on Machine Learning},
pages = {286--294},
title = {{Tracking the Best Expert}},
year = {1995}
}
@inproceedings{saha2011improved,
abstract = {The study of online convex optimization in the bandit setting was initiated by Klein- berg (2004) and Flaxman et al. (2005). Such a setting models a decision maker that has to make decisions in the face of adversari- ally chosen convex loss functions. Moreover, the only information the decision maker re- ceives are the losses. The identities of the loss functions themselves are not revealed. In this setting, we reduce the gap between the best known lower and upper bounds for the class of smooth convex functions, i.e. convex functions with a Lipschitz continuous gradi- ent. Building upon existing work on self- concordant regularizers and one-point gradi- ent estimation, we give the first algorithm whose expected regret is O(T2/3), ignoring constant and logarithmic factors.},
author = {Saha, Ankan and Tewari, Ambuj},
booktitle = {AISTATS},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Saha, Tewari - 2011 - Improved Regret Guarantees for Online Smooth Convex Optimization with Bandit Feedback.pdf:pdf},
title = {{Improved Regret Guarantees for Online Smooth Convex Optimization with Bandit Feedback}},
url = {http://jmlr.csail.mit.edu/proceedings/papers/v15/saha11a/saha11a.pdf},
volume = {15},
year = {2011}
}
@inproceedings{Flaounas2011,
author = {Flaounas, I and Ali, O and Turchi, M and Snowsill, T and Nicart, F and {De Bie}, T and Cristianini, N},
booktitle = {Proceedings of the 2011 ACM SIGMOD international conference on Management of data},
pages = {1275--1278},
publisher = {ACM},
title = {{NOAM: News Outlets Analysis and Monitoring System}},
year = {2011}
}
@book{boyd94lmi,
address = {Philadelphia, PA},
author = {Boyd, S and El{\~{}}Ghaoui, L and Feron, E and Balakrishnan, V},
isbn = {0-89871-334-X},
publisher = {SIAM},
series = {Studies in Applied Mathematics},
title = {{Linear Matrix Inequalities in System and Control Theory}},
volume = {15},
year = {1994}
}
@misc{seeger-submod,
annote = {$\backslash$url{\{}http://lapmal.epfl.ch/papers/subm{\_}lindesign.pdf{\}}},
author = {Seeger, M},
title = {{On the Submodularity of Linear Experimental Design}},
year = {2009}
}
@article{R76,
author = {Rockafellar, R Tyrrell},
journal = {SIAM Journal on Control and Optimization},
keywords = {algorithms,point,proximal},
number = {5},
pages = {877--898},
title = {{Monotone Operators and the Proximal Point Algorithm}},
volume = {14},
year = {1976}
}
@article{friedman2010note,
author = {Friedman, J and Hastie, T and Tibshirani, R},
journal = {preprint},
title = {{A note on the group lasso and a sparse group lasso}},
year = {2010}
}
@article{kaufmann2012thompson,
abstract = {The question of the optimality of Thompson Sampling for solving the stochastic multi-armed bandit problem had been open since 1933. In this paper we answer it positively for the case of Bernoulli rewards by providing the first finite-time analysis that matches the asymptotic rate given in the Lai and Robbins lower bound for the cumulative regret. The proof is accompanied by a numerical comparison with other optimal policies, experiments that have been lacking in the literature until now for the Bernoulli case.},
author = {Kaufmann, Emilie and Korda, Nathaniel and Munos, R{\'{e}}mi},
journal = {Algorithmic Learning Theory},
title = {{Thompson Sampling: An Asymptotically Optimal Finite Time Analysis}},
year = {2012}
}
@inproceedings{narasimhan2006submodular,
author = {Narasimhan, M and Bilmes, J},
booktitle = {Adv. NIPS},
title = {{A submodular-supermodular procedure with applications to discriminative structure learning}},
volume = {19},
year = {2006}
}
@article{ratliff2006maximum,
abstract = {Imitation learning of sequential, goal-directed behavior by standard supervised techniques is often difficult. We frame learning such behaviors as a maximum margin structured prediction problem over a space of policies. In this approach, we learn mappings from features to cost so an optimal policy in an MDP with these cost mimics the expert's behavior. Further, we demonstrate a simple, provably efficient approach to structured maximum margin learning, based on the subgradient method, that leverages existing fast algorithms for inference. Although the technique is general, it is particularly relevant in problems where A and dynamic programming approaches make learning policies tractable in problems beyond the limitations of a QP formulation. We demonstrate our approach applied to route planning for outdoor mobile robots, where the behavior a designer wishes a planner to execute is often clear, while specifying cost functions that engender this behavior is a much more difficult task.},
author = {Ratliff, Nathan D and Bagnell, J Andrew and Zinkevich, Martin A},
doi = {10.1145/1143844.1143936},
isbn = {1595933832},
issn = {17458358},
journal = {Proceedings of the 23rd ICML},
number = {10},
pmid = {17914344},
publisher = {ACM Press},
title = {{Maximum margin planning}},
url = {http://webdocs.cs.ualberta.ca/{~}maz/publications/maximummarginplanning.pdf},
volume = {3},
year = {2006}
}
@article{lauritzen1988local,
author = {Lauritzen, Steffen and Spiegelhalter, David},
journal = {Journal of Royal Statistical Society},
pages = {157--224},
title = {{Local computations with probabilities on graphical structures and their application to expert systems}},
volume = {50},
year = {1988}
}
@article{Kar2011,
abstract = {This paper studies the multi-agent bandit problem in a distributed networked setting. The setting considered assumes only one bandit (the major bandit) has accessible reward information from its samples, whereas the rest (the minor bandits) have unobservable rewards. Under the assumption that the minor bandits are aware of the sampling pattern of the major bandit (but with no direct access to its rewards), a lower bound on the expected average network regret is obtained. The lower bound resembles the logarithmic optimal regret attained in single (classical) bandit problems, but in addition is shown to scale down with the number of agents. A collaborative and adaptive distributed allocation rule DA is proposed and is shown to achieve the lower bound on the expected average regret for a connected inter-bandit communication network. In particular, it is shown that under the DA allocation rule, the minor bandits attain sub-logarithmic expected regrets as opposed to logarithmic in the single agent setting.},
author = {Kar, Soummya and Poor, H. Vincent and Cui, Shuguang},
doi = {10.1109/CDC.2011.6160719},
isbn = {9781612848006},
issn = {01912216},
journal = {Proceedings of the IEEE Conference on Decision and Control},
keywords = {Asymptotically Efficient,Distributed Allocation Rules,Networked Bandit Problems,Partially Observable Rewards},
pages = {1771--1778},
title = {{Bandit problems in networks: Asymptotically efficient distributed allocation rules}},
year = {2011}
}
@inproceedings{kveton2006learning,
author = {Kveton, Branislav and Hauskrecht, Milos},
booktitle = {Proceedings of the 21st National Conference on Artificial Intelligence},
pages = {1161--1166},
title = {{Learning Basis Functions in Hybrid Domains}},
year = {2006}
}
@inproceedings{hanawal2015cheap,
abstract = {We consider stochastic sequential learning problems where the learner can observe the average reward of several actions. Such a setting is interesting in many applications involving monitoring and surveillance, where the set of the actions to observe represent some (geographical) area. The importance of this setting is that in these applications, it is actually cheaper to observe average reward of a group of actions rather than the reward of a single action. We show that when the reward is smooth over a given graph representing the neighboring actions, we can maximize the cumulative reward of learning while minimizing the sensing cost. In this paper we propose CheapUCB, an algorithm that matches the regret guarantees of the known algorithms for this setting and at the same time guarantees a linear cost again over them. As a by-product of our analysis, we establish a Omega($\backslash$sqrt(dT)) lower bound on the cumulative regret of spectral bandits for a class of graphs with effective dimension d.},
author = {Hanawal, Manjesh and Saligrama, Venkatesh and Valko, Michal and Munos, R{\'{e}}mi},
booktitle = {International Conference on Machine Learning},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Hanawal et al. - 2015 - Cheap bandits.pdf:pdf},
title = {{Cheap bandits}},
year = {2015}
}
@misc{openmp2008openmp,
author = {OpenMP},
institution = {{\{}OpenMP{\}} Architecture Review Board},
title = {{{\{}OpenMP{\}} Application Program Interface -- Version 3.0}},
year = {2008}
}
@article{guez2012efficient,
abstract = {Bayesian model-based reinforcement learning is a formally elegant approach to learning optimal behaviour under model uncertainty, trading off exploration and exploitation in an ideal way. Unfortunately, finding the resulting Bayes-optimal policies is notoriously taxing, since the search space becomes enormous. In this paper we introduce a tractable, sample-based method for approximate Bayesoptimal planning which exploits Monte-Carlo tree search. Our approach outperformed prior Bayesian model-based RL algorithms by a significant margin on several well-known benchmark problems – because it avoids expensive applications of Bayes rule within the search tree by lazily sampling models from the current beliefs. We illustrate the advantages of our approach by showing it working in an infinite state space domain which is qualitatively out of reach of almost all previous work in Bayesian exploration.},
author = {Guez, Arthur and Silver, David and Dayan, Peter},
journal = {Neural Information Processing Systems},
title = {{Efficient Bayes-adaptive reinforcement learning using sample-based search}},
year = {2012}
}
@inproceedings{goldberger2004neighbourhood,
author = {Goldberger, Jacob and Roweis, Sam T and Hinton, Geoffrey E and Salakhutdinov, Ruslan},
booktitle = {NIPS},
title = {{Neighbourhood Components Analysis.}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.108.7841},
year = {2004}
}
@inproceedings{piot2013learning,
abstract = {This paper provides a comparative study between Inverse Reinforcement Learning (IRL) and Apprenticeship Learning (AL). IRL and AL are two frameworks, using Markov Decision Processes (MDP), which are used for the imitation learning problem where an agent tries to learn from demonstrations of an expert. In the AL Framework, the agent tries to learn the expert policy whereas in the IRL Framework, the agent tries to learn a reward which can explain the behavior of the expert. This reward is then optimized to imitate the expert. One can wonder if it is worth estimating such a reward, or if estimating a Policy is sufficient. This quite natural question has not really been addressed in the literature right now. We provide partial answers, both from a theoretical and empirical point of view.},
address = {Prague (Czech Republic)},
author = {PIOT, Bilal and Geist, Matthieu and Pietquin, Olivier},
booktitle = {Proceedings of the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML/PKDD 2013)},
doi = {10.1007/978-3-642-40988-2_2},
editor = {Blockeel, Hendrik and Kersting, Kristian and Nijssen, Siegfried and Zelezny, Filip},
isbn = {978-3-642-40987-5},
month = {sep},
pages = {17--32},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
title = {{Learning from demonstrations: Is it worth estimating a reward function?}},
url = {http://www.ecmlpkdd2013.org/wp-content/uploads/2013/07/384.pdf},
volume = {8188},
year = {2013}
}
@inproceedings{collins02discriminative,
address = {Morristown, NJ, USA},
author = {Collins, Michael},
booktitle = {EMNLP '02: Proceedings of the ACL-02 Conference on Empirical Methods in Natural Language Processing},
doi = {http://dx.doi.org/10.3115/1118693.1118694},
pages = {1--8},
publisher = {Association for Computational Linguistics},
title = {{Discriminative training methods for hidden {\{}M{\}}arkov models: theory and experiments with perceptron algorithms}},
year = {2002}
}
@article{karlin1994competitive,
author = {Karlin, Anna and Manasse, Mark and McGeoch, Lyle and Owicki, Susan},
journal = {Algorithmica},
number = {6},
pages = {542--571},
title = {{Competitive Randomized Algorithms for Nonuniform Problems}},
volume = {11},
year = {1994}
}
@inproceedings{Mairal2010a,
author = {Mairal, J and Jenatton, R and Obozinski, G and Bach, F},
booktitle = {Adv. NIPS},
number = {00512556},
organization = {HAL INRIA},
title = {{Network Flow Algorithms for Structured Sparsity}},
year = {2010}
}
@inproceedings{Sanchez-2000-Mislabeled,
author = {Sanchez, J S and Barandela, R and Marques, A I and Alejo, R and J., Badenas.},
booktitle = {Advances in Pattern Recognition Lecture Notes in Computer Science 1876},
pages = {621--630},
title = {{Decontamination of Training Data for Supevised Pattern Recognition.}},
year = {2000}
}
@inproceedings{feng2004dynamic,
author = {Feng, Zhengzhu and Dearden, Richard and Meuleau, Nicolas and Washington, Richard},
booktitle = {Proceedings of the 20th Conference on Uncertainty in Artificial Intelligence},
pages = {154--161},
title = {{Dynamic Programming for Structured Continuous {\{}Markov{\}} Decision Problems}},
year = {2004}
}
@inproceedings{Jiang-2004-Mislabeled,
author = {Jiang, Y and Zhou, Z.-H.},
booktitle = {Lecture Notes in Computer Science 3173},
pages = {356--361},
title = {{Editing Training Data for kNN Classifiers with Neural Network Ensemble.}},
year = {2004}
}
@article{fill1998interruptible,
abstract = {For a large class of examples arising in statistical physics known as attractive spin systems (e.g., the Ising model), one seeks to sample from a probability distribution $\pi$ on an enormously large state space, but elementary sampling is ruled out by the infeasibility of calculating an appropriate normalizing constant. The same difficulty arises in computer science problems where one seeks to sample randomly from a large finite distributive lattice whose precise size cannot be ascertained in any reasonable amount of time. The Markov chain Monte Carlo (MCMC) approximate sampling approach to such a problem is to construct and run "for a long time" a Markov chain with long-run distribution $\pi$. But determining how long is long enough to get a good approximation can be both analytically and empirically difficult. Recently, Propp and Wilson have devised an ingenious and efficient algorithm to use the same Markov chains to produce perfect (i.e., exact) samples from $\pi$. However, the running time of their algorithm is an unbounded random variable whose order of magnitude is typically unknown a priori and which is not independent of the state sampled, so a naive user with limited patience who aborts a long run of the algorithm will introduce bias. We present a new algorithm which (1) again uses the same Markov chains to produce perfect samples from $\pi$, but is based on a different idea (namely, acceptance/rejection sampling); and (2) eliminates user-impatience bias. Like the Propp-Wilson algorithm, the new algorithm applies to a general class of suitably monotone chains, and also (with modification) to "anti-monotone" chains. When the chain is reversible, naive implementation of the algorithm uses fewer transitions but more space than Propp-Wilson. When fine-tuned and applied with the aid of a typical pseudorandom number generator to an attractive spin system on n sites using a random site updating Gibbs sampler whose mixing time $\tau$ is polynomial in n, the algorithm runs in time of the same order (bound) as Propp-Wilson [expectation O($\tau$ log n)] and uses only logarithmically more space [expectation O(n log n), vs. O(n) for Propp-Wilson].},
author = {Fill, James Allen},
journal = {Annals of Applied Probability},
keywords = {Attractive spin system,Duality,Gibbs sampler,Ising model,Markov chain Monte Carlo,Monotone chain,Partially ordered set,Perfect simulation,Rejection sampling,Separation,Strong stationary time},
number = {1},
pages = {131--162},
title = {{An interruptible algorithm for perfect sampling via Markov chains}},
volume = {8},
year = {1998}
}
@inproceedings{charikar1997incremental,
author = {Charikar, Moses and Chekuri, Chandra and Feder, Tomas and Motwani, Rajeev},
booktitle = {Proceedings of the 29th Annual ACM Symposium on Theory of Computing},
pages = {626--635},
title = {{Incremental Clustering and Dynamic Information Retrieval}},
year = {1997}
}
@article{fujishige2006minimum,
author = {Fujishige, Satoru and Isotani, S},
journal = {Pacific Journal of Optimization},
pages = {3--17},
title = {{A Submodular Function Minimization Algorithm Based on the Minimum-Norm Base}},
volume = {7},
year = {2011}
}
@inproceedings{valko2013finite,
abstract = {We tackle the problem of online reward maximisation over a large finite set of actions described by their contexts. We focus on the case when the number of actions is too big to sample all of them even once. However we assume that we have access to the similarities between actions' contexts and that the expected reward is an arbitrary linear function of the contexts' images in the related reproducing kernel Hilbert space (RKHS). We propose KernelUCB, a kernelised UCB algorithm, and give a cumulative regret bound through a frequentist analysis. For contextual bandits, the related algorithm GP-UCB turns out to be a special case of our algorithm, and our finite-time analysis improves the regret bound of GP-UCB for the agnostic case, both in the terms of the kernel-dependent quantity and the RKHS norm of the reward function. Moreover, for the linear kernel, our regret bound matches the lower bound for contextual linear bandits.},
author = {Valko, Michal and Korda, Nathan and Munos, R{\'{e}}mi and Flaounas, Ilias and Cristianini, Nelo},
booktitle = {Uncertainty in Artificial Intelligence},
title = {{Finite-time analysis of kernelised contextual bandits}},
year = {2013}
}
@techreport{submodular_tutorial,
author = {Bach, F},
institution = {HAL},
number = {00527714},
title = {{Convex Analysis and Optimization with Submodular Functions: a Tutorial}},
year = {2010}
}
@inproceedings{guestrin2004solving,
author = {Guestrin, Carlos and Hauskrecht, Milos and Kveton, Branislav},
booktitle = {Proceedings of the 20th Conference on Uncertainty in Artificial Intelligence},
pages = {235--242},
title = {{Solving Factored {\{}MDPs{\}} with Continuous and Discrete Variables}},
year = {2004}
}
@inproceedings{mnih2008,
author = {Mnih, V and Szepesv{\'{a}}ri, $\backslash$textCs and Audibert, J.-Y.},
booktitle = {ICML},
pages = {672--679},
title = {{Empirical {\{}B{\}}ernstein stopping}},
year = {2008}
}
@inproceedings{schuurmans2002direct,
author = {Schuurmans, Dale and Patrascu, Relu},
booktitle = {Advances in Neural Information Processing Systems 14},
pages = {1579--1586},
title = {{Direct Value-Approximation for Factored {\{}MDPs{\}}}},
year = {2002}
}
@inproceedings{jamieson2014lilUCB,
author = {Jamieson, Kevin and Malloy, Matthew and Nowak, Robert and Bubeck, S{\'{e}}bastien},
booktitle = {Conference on Learning Theory},
title = {{lil'UCB: An Optimal Exploration Algorithm for Multi-Armed Bandits}},
year = {2014}
}
@inproceedings{el-yaniv_stable_2006,
author = {El-Yaniv, Ran and Pechyony, Dmitry},
booktitle = {Proceedings of COLT},
title = {{Stable transductive learning}},
year = {2006}
}
@inproceedings{zhu2005harmonic,
address = {New York, NY, USA},
author = {Zhu, Xiaojin and Lafferty, John},
booktitle = {Proceedings of the 22nd international conference on Machine learning},
doi = {http://doi.acm.org/10.1145/1102351.1102484},
isbn = {1-59593-180-5},
pages = {1052--1059},
publisher = {ACM},
series = {ICML '05},
title = {{Harmonic mixtures: combining mixture models and graph-based methods for inductive and scalable semi-supervised learning}},
url = {http://doi.acm.org/10.1145/1102351.1102484},
year = {2005}
}
@inproceedings{joachims1999transductive,
address = {San Francisco, CA, USA},
author = {Joachims, Thorsten},
booktitle = {ICML '99: Proceedings of the Sixteenth International Conference on Machine Learning},
isbn = {1-55860-612-2},
pages = {200--209},
title = {{Transductive Inference for Text Classification using Support Vector Machines}},
year = {1999}
}
@article{browne2012survey,
author = {Browne, Cameron B. and Powley, Edward and Whitehouse, Daniel and Lucas, Simon M. and Cowling, Peter I. and Rohlfshagen, Philipp and Tavener, Stephen and Perez, Diego and Samothrakis, Spyridon and Colton, Simon},
journal = {IEEE Transactions on Computational Intelligence and AI in Games},
number = {1},
pages = {1--43},
title = {{A survey of Monte Carlo tree search methods}},
volume = {4},
year = {2012}
}
@misc{urlhttp://mplab.ucsd.edumplab,
author = {$\backslash$urlhttp://mplab.ucsd.edu},
title = {{MPLab GENKI Database}}
}
@inproceedings{syed2008apprenticeship,
author = {Syed, Umar and Schapire, Robert and Bowling, Michael},
booktitle = {Proceedings of the Twenty-Fifth International Conference on Machine Learning (ICML)},
pages = {1032--1039},
title = {{Apprenticeship Learning Using Linear Programming}},
url = {http://www.cs.princeton.edu/{~}usyed/SyedBowlingSchapireICML2008.pdf},
year = {2008}
}
@inproceedings{AYSze11,
author = {Abbasi-Yadkori, Yasin and Szepesv{\'{a}}ri, $\backslash$textCsaba},
title = {{Regret Bounds for the Adaptive Control of Linear Quadratic Systems}},
url = {http://webdocs.cs.ualberta.ca/{~}abbasiya/LQR.pdf}
}
@inproceedings{titov07incremental,
address = {Prague, Czech Republic},
author = {Titov, Ivan and Henderson, James},
booktitle = {ACL '07: Proceedings of the 45th Annual Meeting of the Association for Computational Linguistics},
pages = {632--639},
publisher = {Association for Computational Linguistics},
title = {{Constituent Parsing with Incremental Sigmoid Belief Networks}},
url = {http://www.aclweb.org/anthology/P/P07/P07-0080},
year = {2007}
}
@article{queyranne1998minimizing,
author = {Queyranne, M},
journal = {Mathematical Programming},
number = {1},
pages = {3--12},
publisher = {Springer},
title = {{Minimizing symmetric submodular functions}},
volume = {82},
year = {1998}
}
@inproceedings{atkeson,
author = {Atkeson, C G and Schaal, S},
pages = {12--20},
title = {{Robot learning from demonstration}}
}
@inproceedings{chau2011apolo,
author = {Chau, Duen Horng and Kittur, Aniket and Hong, Jason I. and Faloutsos, Christos},
booktitle = {Conference on Human Factors in Computing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Chau et al. - 2011 - Apolo making sense of large network data by combining rich user interaction and machine learning.pdf:pdf},
title = {{Apolo: Making sense of large network data by combining rich user interaction and machine learning}},
year = {2011}
}
@article{agrawal1995continuum,
author = {Agrawal, R},
journal = {SIAM Journal on Control and Optimization},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {1926--1951},
title = {{The continuum-armed bandit problem}},
volume = {33},
year = {1995}
}
@techreport{zhu2008semi-supervised,
author = {Zhu, Xiaojin},
institution = {University of Wisconsin-Madison},
number = {1530},
title = {{Semi-supervised learning literature survey}},
year = {2008}
}
@article{gyorfi12empirical,
author = {Gy{\"{o}}rfi, L{\'{a}}szl{\'{o}} and Walk, Harro},
journal = {IEEE Transactions on Information Theory},
number = {10},
pages = {6320--6331},
title = {{Empirical Portfolio Selection Strategies With Proportional Transaction Costs}},
volume = {58},
year = {2012}
}
@techreport{cohen_online_2016,
abstract = {Finding a small spectral approximation for a tall {\$}n \backslashtimes d{\$} matrix {\$}A{\$} is a fundamental numerical primitive. For a number of reasons, one often seeks an approximation whose rows are sampled from those of {\$}A{\$}. Row sampling improves interpretability, saves space when {\$}A{\$} is sparse, and preserves row structure, which is especially important, for example, when {\$}A{\$} represents a graph. However, correctly sampling rows from {\$}A{\$} can be costly when the matrix is large and cannot be stored and processed in memory. Hence, a number of recent publications focus on row sampling in the streaming setting, using little more space than what is required to store the outputted approximation [KL13, KLM+14]. Inspired by a growing body of work on online algorithms for machine learning and data analysis, we extend this work to a more restrictive online setting: we read rows of {\$}A{\$} one by one and immediately decide whether each row should be kept in the spectral approximation or discarded, without ever retracting these decisions. We present an extremely simple algorithm that approximates {\$}A{\$} up to multiplicative error {\$}\backslashepsilon{\$} and additive error {\$}\backslashdelta{\$} using {\$}O(d \backslashlog d \backslashlog(\backslashepsilon||A||{\_}2/\backslashdelta)/\backslashepsilon{\^{}}2){\$} online samples, with memory overhead proportional to the cost of storing the spectral approximation. We also present an algorithm that uses {\$}O(d{\^{}}2{\$}) memory but only requires {\$}O(d\backslashlog(\backslashepsilon||A||{\_}2/\backslashdelta)/\backslashepsilon{\^{}}2){\$} samples, which we show is optimal. Our methods are clean and intuitive, allow for lower memory usage than prior work, and expose new theoretical properties of leverage score based matrix approximation.},
annote = {arXiv: 1604.05448},
archivePrefix = {arXiv},
arxivId = {1604.05448},
author = {Cohen, Michael B and Musco, Cameron and Pachocki, Jakub},
eprint = {1604.05448},
title = {{Online row sampling}},
url = {http://arxiv.org/abs/1604.05448},
year = {2016}
}
@inproceedings{zhang1996high-performance,
author = {Zhang, Wei and Dietterich, Thomas},
booktitle = {Advances in Neural Information Processing Systems 8},
pages = {1024--1030},
title = {{High-Performance Job-Shop Scheduling with a Time-Delay {\{}TD{\}}($\lambda$) Network}},
year = {1996}
}
@book{nara,
annote = {Second edition},
author = {Narayanan, H},
publisher = {North-Holland},
title = {{Submodular Functions and Electrical Networks}},
year = {2009}
}
@book{guestrin2008beyond,
address = {Helsinki, Finland},
author = {Guestrin, Carlos and Krause, Andreas},
publisher = {Tutorial at the 25rd International Conference on Machine Learning (ICML)},
title = {{Beyond convexity - submodularity in machine learning}},
year = {2008}
}
@inproceedings{bresina2002planning,
author = {Bresina, John and Dearden, Richard and Meuleau, Nicolas and Ramakrishnan, Sailesh and Smith, David and Washington, Rich},
booktitle = {Proceedings of the 18th Conference on Uncertainty in Artificial Intelligence},
pages = {77--84},
title = {{Planning Under Continuous Time and Resource Uncertainty: A Challenge for {\{}AI{\}}}},
year = {2002}
}
@article{even2006action,
author = {Even-Dar, Eyal and Mannor, Shie and Mansour, Yishay},
journal = {Journal of Machine Learning Research},
pages = {1079--1105},
publisher = {JMLR. org},
title = {{Action elimination and stopping conditions for the multi-armed bandit and reinforcement learning problems}},
volume = {7},
year = {2006}
}
@inproceedings{finkel08crfpcfg,
author = {Finkel, Jenny R and Kleeman, Alex and Manning, Christopher D},
booktitle = {ACL '08: Proceedings of the 46th Annual Meeting of the Association for Computational Linguistics},
pages = {959--967},
publisher = {Association for Computational Linguistics},
title = {{Efficient, Feature-based, Conditional Random Field Parsing}},
year = {2008}
}
@article{ghahramani1997factorial,
address = {Hingham, MA, USA},
annote = {comps{\_}models},
author = {Ghahramani, Zoubin and Jordan, Michael I},
issn = {0885-6125},
journal = {Mach. Learn.},
number = {2-3},
pages = {245--273},
publisher = {Kluwer Academic Publishers},
title = {{Factorial Hidden Markov Models}},
url = {http://www.springerlink.com/content/w3523227075k34t4/},
volume = {29},
year = {1997}
}
@inproceedings{silver2010monte-carlo,
abstract = {This paper introduces a Monte-Carlo algorithm for online planning in large POMDPs. The algorithm combines a Monte-Carlo update of the agent's belief state with a Monte-Carlo tree search from the current belief state. The new algorithm, POMCP, has two important properties. First, Monte- Carlo sampling is used to break the curse of dimensionality both during belief state updates and during planning. Second, only a black box simulator of the POMDP is required, rather than explicit probability distributions. These properties enable POMCP to plan e ectively in signi cantly larger POMDPs than has previously been possible. We demonstrate its effectiveness in three large POMDPs. We scale up a well-known benchmark problem, rocksample, by several orders of magnitude. We also introduce two challenging new POMDPs: 10 X 10 battleship and partially observable PacMan, with approximately 10{\^{}}18 and 10{\^{}}56 states respectively. Our Monte- Carlo planning algorithm achieved a high level of performance with no prior knowledge, and was also able to exploit simple domain knowledge to achieve better results with less search. POMCP is the rst general purpose planner to achieve high performance in such large and unfactored POMDPs.},
author = {Silver, David and Veness, Joel},
booktitle = {Neural Information Processing Systems},
title = {{Monte-Carlo planning in large POMDPs}},
year = {2010}
}
@article{HK12,
author = {Hazan, Elad and Kale, Satyen},
journal = {Journal of Machine Learning Research},
title = {{Online Submodular Minimization}},
volume = {13(Oct)},
year = {2012}
}
@inproceedings{dechter1996bucket,
author = {Dechter, Rina},
booktitle = {Proceedings of the 12th Conference on Uncertainty in Artificial Intelligence},
pages = {211--219},
title = {{Bucket Elimination: A Unifying Framework for Probabilistic Inference}},
year = {1996}
}
@article{kempe2003maximizing,
abstract = {Models for the processes by which ideas and influence propagate through a social network have been studied in a number of domains, including the diffusion of medical and technological innovations, the sudden and widespread adoption of various strategies in game-theoretic settings, and the effects of “word of mouth” in the promotion of new products. Recently, motivated by the design of viral marketing strategies, Domingos and Richardson posed a fundamental algorithmic problem for such social network processes: if we can try to convince a subset of individuals to adopt a new product or innovation, and the goal is to trigger a large cascade of further adoptions, which set of individuals should we target? We consider this problem in several of the most widely studied models in social network analysis. The optimization problem of selecting the most influential nodes is NP-hard here, and we provide the first provable approximation guarantees for efficient algorithms. Using an analysis framework based on submodular functions, we show that a natural greedy strategy obtains a solution that is provably within 63{\%} of optimal for several classes of models; our framework suggests a general approach for reasoning about the performance guarantees of algorithms for these types of influence problems in social networks. We also provide computational experiments on large collaboration networks, showing that in addition to their provable guarantees, our approximation algorithms significantly out-perform nodeselection heuristics based on the well-studied notions of degree centrality and distance centrality from the field of social networks.},
author = {Kempe, David and Kleinberg, Jon and Tardos, {\'{E}}va},
journal = {Knowledge Discovery and Data Mining},
pages = {137},
title = {{Maximizing the spread of influence through a social network}},
year = {2003}
}
@article{schedlbauer2009what,
abstract = {Alerts and prompts represent promising types of decision support in
electronic prescribing to tackle inadequacies in prescribing. A systematic
review was conducted to evaluate the efficacy of computerized drug
alerts and prompts searching EMBASE, CINHAL, MEDLINE, and PsychINFO
up to May 2007. Studies assessing the impact of electronic alerts
and prompts on clinicians' prescribing behavior were selected and
categorized by decision support type. Most alerts and prompts (23
out of 27) demonstrated benefit in improving prescribing behavior
and/or reducing error rates. The impact appeared to vary based on
the type of decision support. Some of these alerts (n = 5) reported
a positive impact on clinical and health service management outcomes.
For many categories of reminders, the number of studies was very
small and few data were available from the outpatient setting. None
of the studies evaluated features that might make alerts and prompts
more effective. Details of an updated search run in Jan 2009 are
included in the supplement section of this review.},
author = {Schedlbauer, Angela and Prasad, Vibhore and Mulvaney, Caroline and Phansalkar, Shobha and Stanton, Wendy and Bates, David W and Avery, Anthony J},
doi = {10.1197/jamia.M2910},
institution = {Division of Primary Care, School of Community Health Sciences, Research and Learning Resources Division, Information Services, University of Nottingham, Nottingham, UK. angela.schedlbauer@nottingham.ac.uk},
journal = {J Am Med Inform Assoc},
keywords = {Clinical Competence; Decision Support Systems,Clinical; Drug Therapy,Computer-Assisted; Electronic Prescribing; Humans,prevention /{\&}/ control; Medication Systems; Remin},
number = {4},
pages = {531--538},
pmid = {19390110},
title = {{What evidence supports the use of computerized alerts and prompts to improve clinicians' prescribing behavior?}},
url = {http://dx.doi.org/10.1197/jamia.M2910},
volume = {16},
year = {2009}
}
@article{globerson2007visualizing,
annote = {comps{\_}distance},
author = {Globerson, A and Roweis, S},
journal = {The 11th International Workshop on Artificial Intelligence and Statistics Puerto-Rico},
title = {{Visualizing pairwise similarity via semidefinite programming}},
url = {http://people.csail.mit.edu/gamir/pubs/psde.pdf},
year = {2007}
}
@inproceedings{rahimi2007random,
abstract = {To accelerate the training of kernel machines, we propose to map the input data to a randomized low-dimensional feature space and then apply existing fast linear methods. Our randomized features are designed so that the inner products of the transformed data are approximately equal to those in the feature space of a user specified shift-invariant kernel. We explore two sets of random features, provide convergence bounds on their ability to approximate various radial basis kernels, and show that in large-scale classification and regression tasks linear machine learning algorithms that use these features outperform state-of-the-art large-scale kernel machines.},
author = {Rahimi, Ali and Recht, Ben},
booktitle = {Neural Information Processing Systems},
title = {{Random features for large-scale kernel machines}},
year = {2007}
}
@inproceedings{Varoquaux2010a,
author = {Varoquaux, G and Jenatton, R and Gramfort, A and Obozinski, G and Thirion, B and Bach, F},
booktitle = {NIPS Workshop on Practical Applications of Sparse Modeling: Open Issues and New Directions},
title = {{Sparse Structured Dictionary Learning for Brain Resting-State Activity Modeling}},
year = {2010}
}
@inproceedings{kuniyoshi94visionbased,
author = {Kuniyoshi, Y and Riekki, J and Ishii, M and Rougeaux, S and Kita, N and Sakane, S and Kakikura, M},
booktitle = {IEEE/RSJ IROS},
pages = {925--931},
title = {{Vision-based behaviors for multi-robot cooperation}},
url = {citeseer.ist.psu.edu/kuniyoshi94visionbased.html},
year = {1994}
}
@article{casella1996rao-blackwellisation,
author = {Casella, George and Robert, Christian},
journal = {Biometrika},
number = {1},
pages = {81--94},
title = {{{\{}Rao-Blackwellisation{\}} of Sampling Schemes}},
volume = {83},
year = {1996}
}
@book{boucheron13concentration,
author = {Boucheron, St{\'{e}}phane and Lugosi, G{\'{a}}bor and Massart, Pascal},
publisher = {Oxford University Press},
title = {{Concentration inequalities}},
year = {2013}
}
@article{Bubeck2009,
abstract = {We consider the framework of stochastic multi-armed bandit problems and study the possibilities and limitations of strategies that perform an online exploration of the arms. The strategies are assessed in terms of their simple regret, a regret notion that captures the fact that exploration is only constrained by the number of available rounds (not necessarily known in advance), in contrast to the case when the cumulative regret is considered and when exploitation needs to be performed at the same time. We believe that this performance criterion is suited to situations when the cost of pulling an arm is expressed in terms of resources rather than rewards. We discuss the links between the simple and the cumulative regret. The main result is that the required explorationexploitation trade-offs are qualitatively different, in view of a general lower bound on the simple regret in terms of the cumulative regret.},
author = {Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi and Stoltz, Gilles},
journal = {Algorithmic Learning Theory},
keywords = {computational,information theoretic learning with statistics,theory {\&} algorithms},
pages = {23--37},
publisher = {Springer-Verlag},
title = {{Pure Exploration in Multi-armed Bandits Problems}},
url = {http://eprints.pascal-network.org/archive/00006108/},
year = {2009}
}
@inproceedings{dearden1999model,
author = {Dearden, Richard and Friedman, Nir and Andre, David},
booktitle = {Proceedings of the 15th Conference on Uncertainty in Artificial Intelligence},
pages = {150--159},
title = {{Model Based {\{}Bayesian{\}} Exploration}},
year = {1999}
}
@inproceedings{yang2012simple,
abstract = {In this work, we develop a simple algorithm for semi-supervised regression. The key idea is to use the top eigenfunctions of integral operator derived from both labeled and unlabeled examples as the basis functions and learn the prediction function by a simple linear regression. We show that under appropriate assumptions about the integral operator, this approach is able to achieve an improved regression error bound better than existing bounds of supervised learning. We also verify the effectiveness of the proposed algorithm by an empirical study.},
archivePrefix = {arXiv},
arxivId = {1206.6412},
author = {Ji, Ming and Yang, Tianbao and Lin, Binbin and Jin, Rong and Han, Jiawei},
booktitle = {International Conference on Machine Learning},
eprint = {1206.6412},
month = {jun},
title = {{A Simple Algorithm for Semi-supervised Learning with Improved Generalization Error Bound}},
url = {http://arxiv.org/abs/1206.6412},
year = {2012}
}
@inproceedings{BM11,
author = {Bach, F and Moulines, E},
booktitle = {Advances in Neural Information Processing Systems (NIPS)},
title = {{Non-Asymptotic Analysis of Stochastic Approximation Algorithms for Machine Learning}},
year = {2011}
}
@inproceedings{lee2003video-based,
author = {Lee, Kuang-Chih and Ho, Jeffrey and Yang, Ming-Hsuan and Kriegman, David},
booktitle = {Proceedings of the IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
pages = {313--320},
title = {{Video-Based Face Recognition Using Probabilistic Appearance Manifolds}},
year = {2003}
}
@inproceedings{yu2005blockwise,
author = {Yu, Kai and Yu, Shipeng},
booktitle = {Proc. of the 22nd ICML Workshop on Learning},
title = {{Blockwise supervised inference on large graphs}},
year = {2005}
}
@inproceedings{minka2001expectation,
abstract = {This paper presents a new deterministic approximation technique in Bayesian networks. This method, "Expectation Propagation", unifies two previous techniques: assumed-density filtering, an extension of the Kalman filter, and loopy belief propagation, an extension of belief propagation in Bayesian networks. All three algorithms try to recover an approximate distribution which is close in KL divergence to the true distribution. Loopy belief propagation, because it propagates exact belief states, is useful for a limited class of belief networks, such as those which are purely discrete. Expectation Propagation approximates the belief states by only retaining certain expectations, such as mean and variance, and iterates until these expectations are consistent throughout the network. This makes it applicable to hybrid networks with discrete and continuous nodes. Expectation Propagation also extends belief propagation in the opposite direction - it can propagate richer belief states that incorporate correlations between nodes. Experiments with Gaussian mixture models show Expectation Propagation to be convincingly better than methods with similar computational cost: Laplace's method, variational Bayes, and Monte Carlo. Expectation Propagation also provides an efficient algorithm for training Bayes point machine classifiers.},
author = {Minka, Tom},
booktitle = {Uncertainty in Artificial Intelligence},
title = {{Expectation oropagation for approximate Bayesian inference}},
year = {2001}
}
@article{steinwart2005classification,
address = {Cambridge, MA, USA},
annote = {comps{\_}anX},
author = {Steinwart, Ingo and Hush, Don and Scovel, Clint},
issn = {1533-7928},
journal = {Journal of Machine Learning Research},
pages = {211--232},
publisher = {MIT Press},
title = {{A Classification Framework for Anomaly Detection}},
url = {http://jmlr.csail.mit.edu/papers/volume6/steinwart05a/steinwart05a.pdf},
volume = {6},
year = {2005}
}
@article{kempe2015maximizing,
author = {Kempe, David and Kleinberg, Jon and Tardos, {\'{E}}va},
journal = {Theory of Computing},
number = {4},
pages = {105--147},
title = {{Maximizing the spread of influence through a social network}},
volume = {11},
year = {2015}
}
@inproceedings{blum1996on-line,
author = {Blum, Avrim},
booktitle = {Online Algorithms},
pages = {306--325},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
title = {{On-line Algorithms in Machine Learning}},
volume = {1442},
year = {1996}
}
@book{bertsekas1999nonlinear,
address = {Belmont, MA},
author = {Bertsekas, Dimitri},
publisher = {Athena Scientific},
title = {{Nonlinear Programming}},
year = {1999}
}
@book{Edmonds1970,
author = {Edmonds, J},
booktitle = {Combinatorial Structures and Their Applications},
pages = {69--87},
publisher = {New York: Gordon and Breach},
title = {{Submodular functions, matroids, and certain polyhedra in Combinatorial Structures and Their Applications}},
year = {1970}
}
@inproceedings{poupart2002piecewise,
author = {Poupart, Pascal and Boutilier, Craig and Patrascu, Relu and Schuurmans, Dale and Guestrin, Carlos},
booktitle = {Proceedings of the 18th National Conference on Artificial Intelligence},
pages = {292--299},
title = {{Greedy Linear Value-Approximation for Factored {\{}Markov{\}} Decision Processes}},
year = {2002}
}
@inproceedings{tewari08optimistic,
author = {Tewari, Ambuj and Bartlett, Peter L},
pages = {1505--1512},
title = {{Optimistic Linear Programming gives Logarithmic Regret for Irreducible {\{}MDPs{\}}}}
}
@article{feige1998threshold,
author = {Feige, U},
journal = {Journal of the ACM (JACM)},
number = {4},
pages = {634--652},
publisher = {ACM},
title = {{A threshold of {\{}$\backslash$rm ln{\}} n for approximating set cover}},
volume = {45},
year = {1998}
}
@article{nagamochi1998note,
author = {Nagamochi, H and Ibaraki, T},
journal = {Information Processing Letters},
number = {5},
pages = {239--244},
publisher = {Elsevier},
title = {{A note on minimizing submodular functions}},
volume = {67},
year = {1998}
}
@inproceedings{LV06,
author = {Lovasz, L and Vempala, S},
booktitle = {Proceedings of the 47th Annual IEEE Symposium on Foundations of Computer Science (FOCS)},
pages = {57--68},
title = {{Fast algorithms for logconcave functions: sampling, rounding, integration and optimization}},
year = {2006}
}
@article{gomez2003immuno-fuzzy,
author = {Gomez, J and Gonzalez, F and Dasgupta, D},
doi = {10.1109/FUZZ.2003.1206605},
journal = {Fuzzy Systems, 2003. FUZZ '03. The 12th IEEE International Conference on},
keywords = {fuzzy logic,fuzzy rules,fuzzy set theory,immuno fuzzy approach,real data sets,security of data anomaly detection,synthetic sets},
month = {may},
pages = {1219--1224 vol.2},
title = {{An immuno-fuzzy approach to anomaly detection}},
volume = {2},
year = {2003}
}
@incollection{boularias2012algorithms,
author = {Boularias, Abdeslam and Kroemer, Oliver and Peters, Jan},
booktitle = {Advances in Neural Information Processing Systems 25},
editor = {Bartlett, P and Pereira, F C N and Burges, C J C and Bottou, L and Weinberger, K Q},
pages = {2186--2194},
title = {{Algorithms for Learning Markov Field Policies}},
url = {http://books.nips.cc/papers/files/nips25/NIPS2012{\_}1084.pdf},
year = {2012}
}
@article{erdos1959on,
abstract = {P. ErdH os, A. R nyi. Publ. Math. Debrecen, Vol. 6 (1959), pp. 290-297. bibtex-import network.},
author = {Erdős, Paul and R{\'{e}}nyi, Alfr{\'{e}}d},
journal = {Publicationes Mathematicae},
pages = {290--297},
title = {{On random graphs}},
volume = {6},
year = {1959}
}
@article{zhang2015divide,
abstract = {We establish optimal convergence rates for a decomposition-based scalable approach to kernel ridge regression. The method is simple to describe: it randomly partitions a dataset of size N into m subsets of equal size, computes an independent kernel ridge regression estimator for each subset, then averages the local solutions into a global predictor. This partitioning leads to a substantial reduction in computation time versus the standard approach of performing kernel ridge regression on all N samples. Our two main theorems establish that despite the computational speed-up, statistical optimality is retained: as long as m is not too large, the partition-based estimator achieves the statistical minimax rate over all estimators using the set of N samples. As concrete examples, our theory guarantees that the number of processors m may grow nearly linearly for finite-rank kernels and Gaussian kernels and polynomially in N for Sobolev spaces, which in turn allows for substantial reductions in computational cost. We conclude with experiments on both simulated data and a music-prediction task that complement our theoretical results, exhibiting the computational and statistical benefits of our approach.},
author = {Zhang, Yuchen and Duchi, John C. and Wainwright, Martin J.},
journal = {Journal Machine Learning Research},
pages = {3299--3340},
title = {{Divide and conquer kernel ridge regression: A distributed algorithm with minimax optimal rates}},
volume = {16},
year = {2015}
}
@article{gilks1992adaptive,
abstract = {We propose a method for rejection sampling from any univariate log-concave probability density function. The method is adaptive: as sampling proceeds, the rejection envelope and the squeezing function converge to the density function. The rejection envelope and squeezing function are piece-wise exponential functions, the rejection envelope touching the density at previously sampled points, and the squeezing function forming arcs between those points of contact. The technique is intended for situations where evaluation of the density is computationally expensive, in particular for applications of Gibbs sampling to Bayesian models with non-conjugacy. We apply the technique to a Gibbs sampling analysis of monoclonal antibody reactivity.},
author = {Gilks, W. R. and Wild, P.},
journal = {Journal of the Royal Statistical Society. Series C (Applied Statistics)},
keywords = {adaptive rejection sampling,bayesian inference,density,gibbs sampling,log-concave,non-conjugate bayesian models,simulation},
number = {2},
pages = {337--348},
title = {{Adaptive rejection sampling for Gibbs sampling}},
volume = {41},
year = {1992}
}
@inproceedings{akoglu2010oddball:,
author = {Akoglu, Leman and McGlohon, Mary and Faloutsos, Christos},
booktitle = {Advances in Knowledge Discovery and Data Mining, 14th Pacific-Asia Conference, PAKDD 2010, Hyderabad, India, June 21-24, 2010. Proceedings. Part II},
pages = {410--421},
title = {{Oddball: Spotting Anomalies in Weighted Graphs}},
year = {2010}
}
@inproceedings{Gleich2015robustifying,
author = {Gleich, David F and Mahoney, Michael W},
booktitle = {SIGKDD},
pages = {359--368},
title = {{Using Local Spectral Methods to Robustify Graph-Based Learning Algorithms}},
year = {2015}
}
@inproceedings{jegelka2011-fast-approx-sfm,
author = {Jegelka, S and Lin, H and Bilmes, J A},
booktitle = {Adv. NIPS},
title = {{Fast Approximate Submodular Minimization}},
year = {2011}
}
@article{brafman2003r-max,
author = {Brafman, Ronen and Tennenholtz, Moshe},
journal = {Journal of Machine Learning Research},
pages = {213--231},
title = {{R-MAX -- A General Polynomial Time Algorithm for Near-Optimal Reinforcement Learning}},
volume = {3},
year = {2003}
}
@inproceedings{maillard2011finite,
author = {Maillard, Odalric-Ambrym and Munos, R{\'{e}}mi and Stoltz, Gilles},
booktitle = {To appear in Proceedings of the 24th annual Conference On Learning Theory},
keywords = {bandits},
mendeley-tags = {bandits},
series = {COLT '11},
title = {{Finite-Time Analysis of Multi-armed Bandits Problems with Kullback-Leibler Divergences}},
year = {2011}
}
@inproceedings{ghashami2016streaming,
abstract = {Kernel principal component analysis (KPCA) provides a concise set of basis vectors which capture non-linear structures within large data sets, and is a central tool in data analysis and learning. To allow for non-linear relations, typically a full {\$}n \backslashtimes n{\$} kernel matrix is constructed over {\$}n{\$} data points, but this requires too much space and time for large values of {\$}n{\$}. Techniques such as the Nystr$\backslash$"om method and random feature maps can help towards this goal, but they do not explicitly maintain the basis vectors in a stream and take more space than desired. We propose a new approach for streaming KPCA which maintains a small set of basis elements in a stream, requiring space only logarithmic in {\$}n{\$}, and also improves the dependence on the error parameter. Our technique combines together random feature maps with recent advances in matrix sketching, it has guaranteed spectral norm error bounds with respect to the original kernel matrix, and it compares favorably in practice to state-of-the-art approaches.},
author = {Ghashami, Mina and Perry, Daniel and Phillips, Jeff M.},
booktitle = {International Conference on Artificial Intelligence and Statistics},
title = {{Streaming kernel principal component analysis}},
year = {2016}
}
@article{brodley1999identifying,
author = {Brodley, Carla E and Friedl, Mark A},
journal = {J. Artif. Intell. Res. (JAIR)},
pages = {131--167},
title = {{Identifying Mislabeled Training Data}},
volume = {11},
year = {1999}
}
@inproceedings{yan2009fast,
author = {Yan, Donghui and Huang, Ling and Jordan, Michael},
booktitle = {Proceedings of the 15th ACM SIGKDD Conference on Knowledge Discovery and Data Mining},
title = {{Fast Approximate Spectral Clustering}},
year = {2009}
}
@article{mallows,
author = {Mallows, C L},
journal = {Technometrics},
number = {4},
pages = {661--675},
title = {{Some comments on {\{}C{\}}{\_}p}},
volume = {15},
year = {1973}
}
@book{VW95,
author = {van der Vaart, A and Wellner, J},
publisher = {Springer},
title = {{Weak Convergence and Empirical Processes}},
year = {1995}
}
@inproceedings{cortes2008stability,
author = {Cortes, Corinna and Mohri, Mehryar and Pechyony, Dmitry and Rastogi, Ashish},
booktitle = {Proceedings of the 25th International Conference on Machine Learning},
pages = {176--183},
title = {{Stability of Transductive Regression Algorithms}},
year = {2008}
}
@article{koutis_solving_2011,
author = {Koutis, Ioannis and Miller, Gary L and Peng, Richard},
journal = {arXiv preprint arXiv:1102.4842},
title = {{Solving {\{}SDD{\}} linear systems in {\{}timeO{\}} (mlognlog (1/{\{}$\backslash$k o{\}}))}},
url = {http://www.researchgate.net/profile/Richard{\_}Peng/publication/221499482{\_}A{\_}Nearly-m{\_}log{\_}n{\_}Time{\_}Solver{\_}for{\_}SDD{\_}Linear{\_}Systems/links/004635362a1ac2587f000000.pdf},
year = {2011}
}
@article{HW09,
author = {Helmbold, D P and Warmuth, M},
journal = {Journal of Machine Learning Research},
pages = {1705--1736},
title = {{Learning Permutations with Exponential Weights}},
volume = {10},
year = {2009}
}
@inproceedings{nodelman2002continuous,
author = {Nodelman, Uri and Shelton, Christian and Koller, Daphne},
booktitle = {Proceedings of the 18th Conference on Uncertainty in Artificial Intelligence},
pages = {378--387},
title = {{Continuous Time {\{}Bayesian{\}} Networks}},
year = {2002}
}
@book{puterman1994markov,
address = {New York, NY},
author = {Puterman, Martin L},
howpublished = {Hardcover},
isbn = {0471619779},
publisher = {John Wiley {\&} Sons},
title = {{Markov Decision Processes: Discrete Stochastic Dynamic Programming}},
url = {http://www.amazon.ca/exec/obidos/redirect?tag=citeulike09-20{\&}path=ASIN/0471619779},
year = {1994}
}
@article{dayan1994td,
author = {Dayan, Peter and Sejnowski, Terry},
journal = {Machine Learning},
pages = {295--301},
title = {{{\{}TD{\}}($\lambda$) Converges with Probability 1}},
volume = {14},
year = {1994}
}
@techreport{prisadnikov2014exploration,
author = {Prisadnikov, Nedyalko},
doi = {10.3929/ethz-a-010211630},
institution = {Master Thesis, ETH-Z{\"{u}}rich, Department of Computer Science},
title = {{Exploration-exploitation trade-offs via probabilistic matrix factorization}},
year = {2014}
}
@book{boyd,
author = {Boyd, S P and Vandenberghe, L},
publisher = {Cambridge University Press},
title = {{Convex Optimization}},
year = {2004}
}
@inproceedings{globerson07exponentiated,
author = {Globerson, Amir and Koo, Terry Y and Carreras, Xavier and Collins, Michael},
doi = {http://doi.acm.org/10.1145/1273496.1273535},
pages = {305--312},
title = {{Exponentiated gradient algorithms for log-linear structured prediction}}
}
@incollection{howard1984influence,
address = {Menlo Park, CA},
author = {Howard, Ronald and Matheson, James},
booktitle = {Readings on the Principles and Applications of Decision Analysis},
pages = {719--762},
publisher = {Strategic Decisions Group},
title = {{Influence Diagrams}},
volume = {2},
year = {1984}
}
@inproceedings{gramacy2003adaptive,
author = {Gramacy, Robert and Warmuth, Manfred and Brandt, Scott and Ari, Ismail},
booktitle = {Advances in Neural Information Processing Systems 15},
pages = {1465--1472},
title = {{Adaptive Caching by Refetching}},
year = {2003}
}
@inproceedings{sun2012size,
author = {Sun, Yi and Schmidhuber, J{\"{u}}rgen and Gomez, Faustino J.},
booktitle = {International Conference on Machine Learning},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Sun, Schmidhuber, Gomez - 2012 - On the Size of the Online Kernel Sparsification Dictionary.pdf:pdf},
title = {{On the size of the online kernel sparsification dictionary}},
year = {2012}
}
@inproceedings{wagstaff2000clustering,
author = {Wagstaff, K and Cardie, C},
booktitle = {Proceedings of the Seventeenth International Conference on Machine Learning},
pages = {1103--1110},
title = {{Clustering with instance-level constraints}},
url = {citeseer.ist.psu.edu/wagstaff00clustering.html},
year = {2000}
}
@inproceedings{klimt2004introducing,
abstract = {A large set of email messages, the Enron corpus, was made public during the legal investigation concerning the Enron corporation. This dataset, along with a thorough explanation of its origin, is available at http://www-2.cs.cmu.edu/{\~{}}enron/. This paper provides a brief introduction and analysis of the dataset. The raw Enron corpus contains 619,446 messages belonging to 158 users. We cleaned the corpus before this analysis by removing certain folders from each user, such as discussionthreads. These folders were present for most users, and did not appear to be used directly by the users, but rather were computer generated. Many, such as alldocuments, also contained large numbers of duplicate emails, which were already present in the users other folders. Our goal in this paper is to analyze the suitability of this corpus for exploring how to classify messages as organized by a human, so these folders would have likely been misleading.},
author = {Klimt, Bryan and Yang, Yiming},
booktitle = {Collaboration, Electronic messaging, Anti-Abuse and Spam Conference},
title = {{Introducing the Enron corpus}},
year = {2004}
}
@article{madigan2002likelihood-based,
author = {Madigan, David and Raghavan, Ini and Dumouchel, William and Nason, Martha and Posse, Christian and Ridgeway, Greg},
journal = {Data Mining and Knowledge Discovery},
number = {2},
pages = {173--190},
title = {{Likelihood-based data squashing: a modeling approach to instance construction}},
volume = {6},
year = {2002}
}
@article{auer2002adaptive,
author = {Auer, Peter and Cesa-Bianchi, Nicol{\`{o}} and Gentile, Claudio},
journal = {Journal of Computer and System Sciences},
pages = {48--75},
title = {{Adaptive and self-confident on-line learning algorithms}},
volume = {64},
year = {2002}
}
@article{bates2003improving,
author = {Bates, David W and Gawande, Atul. A},
journal = {New England Journal of Medicine},
number = {25},
pages = {2526--2534},
title = {{Improving Safety with Information Technology}},
volume = {348},
year = {2003}
}
@article{abernethy2009beating,
author = {Abernethy, Jacob Duncan and Rakhlin, A},
doi = {10.1109/ITA.2009.5044958},
isbn = {9781424439904},
journal = {2009 Information Theory and Applications Workshop},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {280--289},
publisher = {Ieee},
title = {{Beating the adaptive bandit with high probability}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5044958},
year = {2009}
}
@article{hoefling910path,
author = {Hoefling, H},
journal = {Journal of Computational and Graphical Statistics},
number = {4},
pages = {984--1006},
title = {{A path algorithm for the fused {\{}L{\}}asso signal approximator}},
volume = {19},
year = {2010}
}
@inproceedings{EvDaKaMa04,
author = {Even-Dar, E and Kakade, S M and Mansour, Y},
pages = {401--408},
title = {{Experts in a {\{}M{\}}arkov Decision Process}}
}
@inproceedings{SS07,
author = {Shalev-Shwartz, S and Singer, Y},
booktitle = {Advances in Neural Information Processing Systems (NIPS)},
title = {{Convex Repeated Games and Fenchel Duality}},
year = {2007}
}
@article{burges1998tutorial,
author = {Burges, Christopher J C},
journal = {Data Mining and Knowledge Discovery},
number = {2},
pages = {121--167},
title = {{A Tutorial on Support Vector Machines for Pattern Recognition}},
url = {citeseer.ist.psu.edu/burges98tutorial.html},
volume = {2},
year = {1998}
}
@article{freund99perceptron,
address = {Hingham, MA, USA},
author = {Freund, Yoav and Schapire, Robert E},
doi = {http://dx.doi.org/10.1023/A:1007662407062},
issn = {0885-6125},
journal = {Machine Learning},
number = {3},
pages = {277--296},
publisher = {Kluwer Academic Publishers},
title = {{Large Margin Classification Using the Perceptron Algorithm}},
volume = {37},
year = {1999}
}
@article{Gopalan2013,
abstract = {We consider stochastic multi-armed bandit problems with complex actions over a set of basic arms, where the decision maker plays a complex action rather than a basic arm in each round. The reward of the complex action is some function of the basic arms' rewards, and the feedback observed may not necessarily be the reward per-arm. For instance, when the complex actions are subsets of the arms, we may only observe the maximum reward over the chosen subset. Thus, feedback across complex actions may be coupled due to the nature of the reward function. We prove a frequentist regret bound for Thompson sampling in a very general setting involving parameter, action and observation spaces and a likelihood function over them. The bound holds for discretely-supported priors over the parameter space and without additional structural properties such as closed-form posteriors, conjugate prior structure or independence across arms. The regret bound scales logarithmically with time but, more importantly, with an improved constant that non-trivially captures the coupling across complex actions due to the structure of the rewards. As applications, we derive improved regret bounds for classes of complex bandit problems involving selecting subsets of arms, including the first nontrivial regret bounds for nonlinear MAX reward feedback from subsets.},
archivePrefix = {arXiv},
arxivId = {1311.0466},
author = {Gopalan, Aditya and Mannor, Shie and Mansour, Yishay},
eprint = {1311.0466},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Gopalan, Mannor, Mansour - 2013 - Thompson Sampling for Complex Bandit Problems(2).pdf:pdf},
month = {nov},
title = {{Thompson Sampling for Complex Bandit Problems}},
url = {http://arxiv.org/abs/1311.0466},
year = {2013}
}
@article{badanidiyuru2014resourceful,
abstract = {We study contextual bandits with ancillary constraints on resources, which are common in real-world applications such as choosing ads or dynamic pricing of items. We design the first algorithm for solving these problems, and prove a regret guarantee with near-optimal statistical properties.},
archivePrefix = {arXiv},
arxivId = {arXiv:1402.6779v2},
author = {Badanidiyuru, A and Langford, John and Slivkins, Aleksandrs},
eprint = {arXiv:1402.6779v2},
journal = {arXiv preprint arXiv:1402.6779},
pages = {1--22},
title = {{Resourceful Contextual Bandits}},
url = {http://arxiv.org/abs/1402.6779},
year = {2014}
}
@article{hanley1982meaning,
abstract = {A representation and interpretation of the area under a receiver operating
characteristic (ROC) curve obtained by the "rating" method, or by
mathematical predictions based on patient characteristics, is presented.
It is shown that in such a setting the area represents the probability
that a randomly chosen diseased subject is (correctly) rated or ranked
with greater suspicion than a randomly chosen non-diseased subject.
Moreover, this probability of a correct ranking is the same quantity
that is estimated by the already well-studied nonparametric Wilcoxon
statistic. These two relationships are exploited to (a) provide rapid
closed-form expressions for the approximate magnitude of the sampling
variability, i.e., standard error that one uses to accompany the
area under a smoothed ROC curve, (b) guide in determining the size
of the sample required to provide a sufficiently reliable estimate
of this area, and (c) determine how large sample sizes should be
to ensure that one can statistically detect differences in the accuracy
of diagnostic techniques.},
author = {Hanley, J A and Mcneil, B J},
issn = {0033-8419},
journal = {Radiology},
keywords = {auc,auroc,hanley,mcneil,roc},
month = {apr},
number = {1},
pages = {29--36},
title = {{The meaning and use of the area under a receiver operating characteristic (ROC) curve.}},
volume = {143},
year = {1982}
}
@inproceedings{flaxman2005online,
abstract = {We consider a the general online convex optimization framework introduced by Zinkevich. In this setting, there is a sequence of convex functions. Each period, we must choose a signle point (from some feasible set) and pay a cost equal to the value of the next function on our chosen point. Zinkevich shows that, if the each function is revealed after the choice is made, then one can achieve vanishingly small regret relative the best single decision chosen in hindsight. We extend this to the bandit setting where we do not find out the entire functions but rather just their value at our chosen point. We show how to get vanishingly small regret in this setting. Our approach uses a simple approximation of the gradient that is computed from evaluating a function at a single (random) point. We show that this estimate is sufficient to mimic Zinkevich's gradient descent online analysis, with access to the gradient (only being able to evaluate the function at a single point).},
author = {Flaxman, Abraham D and Kalai, Adam Tauman and McMahan, H Brendan and {Brendan McMahan}, Hugh},
booktitle = {Proceedings of the 16th annual ACM-SIAM Symposium On Discrete Algorithms},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Flaxman, Kalai, McMahan - 2004 - Online convex optimization in the bandit setting gradient descent without a gradient.pdf:pdf},
keywords = {bandits},
mendeley-tags = {bandits},
number = {x},
organization = {Carnegie Mellon University},
pages = {385--394},
publisher = {SIAM},
series = {SODA '05},
title = {{Online convex optimization in the bandit setting: gradient descent without a gradient}},
url = {http://arxiv.org/abs/cs/0408007},
year = {2005}
}
@inproceedings{DBLP:conf/icml/2010,
booktitle = {ICML},
editor = {F{\"{u}}rnkranz, Johannes and Joachims, Thorsten},
publisher = {Omnipress},
title = {{Proceedings of the 27th International Conference on Machine Learning (ICML-10), June 21-24, 2010, Haifa, Israel}},
year = {2010}
}
@inproceedings{eaton2007bayesian,
annote = {comps{\_}models},
author = {Eaton, D and Murphy, K},
booktitle = {Proceedings of the 23nd Annual Conference on Uncertainty in Artificial Intelligence (UAI-07)},
title = {{{\{}B{\}}ayesian structure learning using dynamic programming and {\{}MCMC{\}}}},
url = {http://www.cs.ubc.ca/{~}murphyk/Papers/eaton-uai07.pdf},
year = {2007}
}
@inproceedings{carvalho06voting,
address = {New York, NY, USA},
author = {Carvalho, Vitor R and Cohen, William W},
booktitle = {KDD '06},
pages = {548--553},
publisher = {ACM},
title = {{Single-pass online learning: performance, voting schemes and online feature selection}},
year = {2006}
}
@article{zivn�2009expressive,
author = {Zivn$\backslash$i, S and Cohen, D A and Jeavons, P G},
journal = {Discrete Applied Mathematics},
number = {15},
pages = {3347--3358},
publisher = {Elsevier},
title = {{The expressive power of binary submodular functions}},
volume = {157},
year = {2009}
}
@book{soille,
author = {Soille, P},
publisher = {Springer},
title = {{Morphological Image Analysis: Principles and Applications}},
year = {2003}
}
@inproceedings{syed2007mwal,
author = {Syed, Umar and Schapire, Robert},
pages = {1449--1456},
title = {{A Game-Theoretic Approach to Apprenticeship Learning}}
}
@techreport{wainwright2003graphical,
abstract = {The formalism of probabilistic graphical models provides a unifying
framework for the development of large-scale multivariate statistical
models. Graphical models have become a focus of research in many
applied statistical and computational fields, including bioinformatics,
information theory, signal and image processing, information retrieval
and machine learning. Many problems that arise in specific instances�including
the key problems of computing marginals and modes of probability
distributions�are best studied in the general setting. Working with
exponential family representations, and exploiting the conjugate
duality between the cumulant generating function and the entropy
for exponential families, we develop general variational representations
of the problems of computing marginal probabilities and modes. We
describe how a wide variety of known computational algorithms�including
mean field methods and cluster variational techniques�can be understood
in terms of approximations of these variational representations.
We also present novel convex relaxations based on the variational
framework. The variational approach provides a complementary alternative
to Markov chain Monte Carlo as a general source of approximation
methods for inference in large-scale statistical models.},
annote = {comps{\_}models},
author = {Wainwright, Martin J and Jordan, Michael I},
howpublished = {Technical Report 649},
institution = {Dept. of Statistics},
keywords = {duality},
month = {sep},
title = {{Graphical models, exponential families, and variational inference}},
url = {http://www.eecs.berkeley.edu/{~}wainwrig/Papers/WaiJorVariational03.pdf},
year = {2003}
}
@misc{berkeley-parser,
annote = {http://nlp.cs.berkeley.edu/Main.html{\#}Parsing},
author = {Petrov, Slav},
publisher = {University of California},
title = {{Berkeley Parser}},
year = {2007}
}
@article{hannan1957approximation,
author = {Hannan, James},
journal = {Contributions to the theory of games},
pages = {97--139},
title = {{Approximation to Bayes risk in repeated play}},
volume = {3},
year = {1957}
}
@article{schweitzer1985generalized,
author = {Schweitzer, Paul and Seidmann, Abraham},
journal = {Journal of Mathematical Analysis and Applications},
pages = {568--582},
title = {{Generalized Polynomial Approximations in {\{}Markovian{\}} Decision Processes}},
volume = {110},
year = {1985}
}
@article{spielman_graph_2011,
author = {Spielman, Daniel A and Srivastava, Nikhil},
journal = {Journal on Computing},
number = {6},
pages = {1913--1926},
title = {{Graph sparsification by effective resistances}},
volume = {40},
year = {2011}
}
@article{hart2011identification,
abstract = {The purpose of this study was to provide a univariate and multivariate analysis of genomic microbial data and salivary mass-spectrometry proteomic profiles for dental caries outcomes. In order to determine potential useful biomarkers for dental caries, a multivariate classification analysis was employed to build predictive models capable of classifying microbial and salivary sample profiles with generalization performance. We used high-throughput methodologies including multiplexed microbial arrays and SELDI-TOF-MS profiling to characterize the oral flora and salivary proteome in 204 children aged 1-8 years (n = 118 caries-free, n = 86 caries-active). The population received little dental care and was deemed at high risk for childhood caries. Findings of the study indicate that models incorporating both microbial and proteomic data are superior to models of only microbial or salivary data alone. Comparison of results for the combined and independent data suggests that the combination of proteomic and microbial sources is beneficial for the classification accuracy and that combined data lead to improved predictive models for caries-active and caries-free patients. The best predictive model had a 6{\%} test error, {\textgreater}92{\%} sensitivity, and {\textgreater}95{\%} specificity. These findings suggest that further characterization of the oral microflora and the salivary proteome associated with health and caries may provide clinically useful biomarkers to better predict future caries experience.},
author = {Hart, Thomas C and Corby, Patricia M and Hauskrecht, Milos and {Hee Ryu}, Ok and Pelikan, Richard and Valko, Michal and Oliveira, Maria B and Hoehn, Gerald T and Bretz, Walter A},
doi = {10.1155/2011/196721},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Hart et al. - 2011 - Identification of microbial and proteomic biomarkers in early childhood caries.pdf:pdf},
institution = {Department of Periodontics, College of Dentistry, University of Illinois at Chicago, 801 S. Paulina Street, Chicago, IL 60612, USA.},
journal = {Int J Dent},
keywords = {misovalko},
mendeley-tags = {misovalko},
pages = {196721},
pmid = {22013442},
title = {{Identification of microbial and proteomic biomarkers in early childhood caries.}},
url = {http://dx.doi.org/10.1155/2011/196721},
volume = {2011},
year = {2011}
}
@inproceedings{goldberg2011oasis:,
author = {Goldberg, Andrew and Zhu, Xiaojin and Furger, Alex and Xu, Jun-Ming},
booktitle = {Proceedings of the Twenty-Fifth AAAI Conference on Artificial Intelligence},
title = {{OASIS: Online active semisupervised learning}},
year = {2011}
}
@misc{TheMendeleySupportTeam2011b,
abstract = {A quick introduction to Mendeley. Learn how Mendeley creates your personal digital library, how to organize and annotate documents, how to collaborate and share with colleagues, and how to generate citations and bibliographies.},
address = {London},
author = {{The Mendeley Support Team}},
booktitle = {Mendeley Desktop},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/The Mendeley Support Team - 2011 - Getting Started with Mendeley.pdf:pdf},
keywords = {Mendeley,how-to,user manual},
pages = {1--16},
publisher = {Mendeley Ltd.},
title = {{Getting Started with Mendeley}},
url = {http://www.mendeley.com},
year = {2011}
}
@incollection{oki2012glpk,
abstract = {The GLPK (GNU Linear Programming Kit) package is intended for solving large-scale linear programming (LP), mixed integer programming (MIP), and other related problems. It is a set of routines written in ANSI C and organized in the form of a callable library.},
author = {Oki, Eiji},
booktitle = {Linear Programming and Algorithms for Communication Networks - A Practical Guide to Network Design, Control, and Management},
title = {{GNU Linear Programming Kit, Version 4.61}},
url = {http://www.gnu.org/software/glpk/},
year = {2012}
}
@inproceedings{bach2010structured,
author = {Bach, F},
booktitle = {Adv. NIPS},
title = {{Structured sparsity-inducing norms through submodular functions}},
year = {2010}
}
@article{hochbaum1995strongly,
author = {Hochbaum, D S and Hong, S P},
journal = {Mathematical Programming},
number = {1},
pages = {269--309},
publisher = {Springer},
title = {{About strongly polynomial time algorithms for quadratic optimization over submodular constraints}},
volume = {69},
year = {1995}
}
@book{Roc70,
author = {Rockafellar, R},
publisher = {Princeton University Press},
title = {{Convex Analysis}},
year = {1970}
}
@article{seeger2008bayesian,
author = {Seeger, M W},
journal = {Journal of Machine Learning Research},
pages = {759--813},
publisher = {JMLR. org},
title = {{Bayesian inference and optimal design for the sparse linear model}},
volume = {9},
year = {2008}
}
@inproceedings{shalev2007pegasos,
author = {Shalev-Shwartz, S and Singer, Y and Srebro, N},
booktitle = {Proc. ICML},
title = {{Pegasos: Primal estimated sub-gradient solver for svm}},
year = {2007}
}
@book{pearl1988probabilistic,
address = {San Francisco, CA, USA},
author = {Pearl, Judea},
isbn = {0-934613-73-7},
publisher = {Morgan Kaufmann Publishers Inc.},
title = {{Probabilistic reasoning in intelligent systems: networks of plausible inference}},
year = {1988}
}
@article{KW52,
author = {Kiefer, J and Wolfowitz, J},
journal = {Annals of Mathematical Statistics},
pages = {462--466},
title = {{Stochastic estimation of the maximum of a regression function}},
volume = {23},
year = {1952}
}
@article{daniel1973stability,
author = {Daniel, James},
journal = {Mathematical Programming},
pages = {41--53},
title = {{Stability of the solution of definite quadratic programs}},
volume = {5},
year = {1973}
}
@inproceedings{zhang2005learning-based,
address = {New York, NY, USA},
author = {Zhang, Jian and Rexford, Jennifer and Feigenbaum, Joan},
booktitle = {MineNet '05: Proceedings of the 2005 ACM SIGCOMM workshop on Mining network data},
doi = {http://doi.acm.org/10.1145/1080173.1080189},
isbn = {1-59593-026-4},
pages = {219--220},
publisher = {ACM},
title = {{Learning-based anomaly detection in BGP updates}},
year = {2005}
}
@inproceedings{kocak2016online,
abstract = {We propose a new partial-observability model for online learning problems where the learner, besides its own loss, also observes some noisy feedback about the other actions, depending on the underlying structure of the problem. We represent this structure by a weighted directed graph, where the edge weights are related to the quality of the feedback shared by the connected nodes. Our main contribution is an efficient algorithm that guarantees a regret of O(sqrt(alpha{\^{}}* T) after T rounds, where alpha{\^{}}* is a novel graph property that we call the effective independence number. Our algorithm is completely parameter-free and does not require knowledge (or even estimation) of alpha{\^{}}*. For the special case of binary edge weights, our setting reduces to the partial-observability models of Mannor {\&} Shamir (2011) and Alon et al. (2013) and our algorithm recovers the near-optimal regret bounds.},
author = {Koc{\'{a}}k, Tom{\'{a}}{\v{s}} and Neu, Gergely and Valko, Michal},
booktitle = {International Conference on Artificial Intelligence and Statistics},
title = {{Online learning with noisy side observations}},
year = {2016}
}
@inproceedings{zhu2009some,
address = {Piscataway, NJ, USA},
author = {Zhu, Xiaojin and Goldberg, Andrew B and Khot, Tushar},
booktitle = {ICME'09: Proceedings of the 2009 IEEE international conference on Multimedia and Expo},
isbn = {978-1-4244-4290-4},
pages = {1504--1507},
publisher = {IEEE Press},
title = {{Some new directions in graph-based semi-supervised learning}},
year = {2009}
}
@inproceedings{karnin2013almost,
author = {Karnin, Zohar and Koren, Tomer and Somekh, Oren},
booktitle = {International Conference on Machine Learning},
title = {{Almost optimal exploration in multi-armed bandits}},
year = {2013}
}
@article{Vov99,
author = {Vovk, V},
journal = {Machine Learning},
number = {3},
pages = {247--282},
title = {{Derandomizing stochastic prediction strategies}},
volume = {35},
year = {1999}
}
@book{golub83matrix,
author = {Golub, G H and Loan, C F Van},
publisher = {Johns Hopkins University Press},
title = {{Matrix Computations}},
year = {1996}
}
@article{matus1995extreme,
author = {Matus, F},
journal = {Discrete Mathematics},
number = {1},
pages = {177--192},
publisher = {Citeseer},
title = {{Extreme convex set functions with many nonnegative differences}},
volume = {135},
year = {1995}
}
@incollection{ORVR13,
author = {Osband, Ian and Russo, Dan and {Van Roy}, Benjamin},
booktitle = {Advances in Neural Information Processing Systems 26},
editor = {Burges, C J C and Bottou, L and Welling, M and Ghahramani, Z and Weinberger, K Q},
pages = {3003--3011},
title = {{({\{}M{\}}ore) Efficient Reinforcement Learning via Posterior Sampling}},
year = {2013}
}
@inproceedings{wipf,
author = {Wipf, D and Nagarajan, S},
booktitle = {Adv. NIPS 22},
title = {{Sparse Estimation Using General Likelihoods and Non-Factorial Priors}},
year = {2009}
}
@article{EKMW08,
author = {Even-Dar, Eyal and Kearns, Michael and Mansour, Yishay and Wortman, Jennifer},
journal = {Machine Learning},
number = {1-2},
pages = {21--37},
title = {{Regret to the best vs. regret to the average}},
volume = {72},
year = {2008}
}
@article{cortez2009,
author = {Cortez, P and Cerdeira, A and Almeida, F and Matos, T and Reis, J},
journal = {Decision Support Systems},
pages = {547--553},
publisher = {Elsevier},
title = {{Modeling wine preferences by data mining from physicochemical properties}},
volume = {47},
year = {2009}
}
@inproceedings{low2010graphlab:,
address = {Catalina Island, California},
author = {Low, Yucheng and Gonzalez, Joseph and Kyrola, Aapo and Bickson, Danny and Guestrin, Carlos and Hellerstein, Joseph M},
booktitle = {Conference on Uncertainty in Artificial Intelligence (UAI)},
month = {jul},
title = {{GraphLab: A New Parallel Framework for Machine Learning}},
year = {2010}
}
@inproceedings{viswanath2009evolution,
abstract = {Online social networks have become extremely popular; numerous sites allow users to interact and share content using social links. Users of these networks often establish hundreds to even thousands of social links with other users. Recently, researchers have suggested examining the activity network— a network that is based on the actual interaction between users, rather than mere friendship—to distinguish between strong and weak links. While initial studies have led to in- sights on how an activity network is structurally different from the social network itself, a natural and important aspect of the activity network has been disregarded: the fact that over time social links can grow stronger or weaker. In this paper, we study the evolution of activity between users in the Facebook social network to capture this notion. We find that links in the activity network tend to come and go rapidly over time, and the strength of ties exhibits a general decreasing trend of activity as the social network link ages. For example, only 30{\%} of Facebook user pairs interact consistently from one month to the next. Interestingly, we also find that even though the links of the activity network change rapidly over time, many graph-theoretic properties of the activity network remain unchanged.},
author = {Viswanath, Bimal and Mislove, Alan and Cha, Meeyoung and Gummadi, Krishna P.},
booktitle = {ACM Workshop on Online Social Networks},
title = {{On the evolution of user interaction in facebook}},
year = {2009}
}
@article{tesauro1995temporal,
author = {Tesauro, Gerald},
journal = {Communications of the ACM},
number = {3},
pages = {58--68},
title = {{Temporal Difference Learning and {\{}TD-Gammon{\}}}},
volume = {38},
year = {1995}
}
@inproceedings{ng2000algorithms,
abstract = {This paper addresses the problem of inverse reinforcement learning (IRL) in Markov decision processes, that is, the problem of extracting a reward function given observed, optimal behaviour. IRL may be useful for apprenticeship learning to acquire skilled behaviour, and for ascertaining the reward function being optimized by a natural system. We rst characterize the set of all reward functions for which a given policy is optimal. We then derive three algorithms for IRL. The rst two deal with the case where the entire policy is known; we handle tabulated reward functions on a nite state space and linear functional approximation of the reward function over a potentially in- nite state space. The third algorithm deals with the more realistic case in which the policy is known only through a nite set of observed trajectories. In all cases, a key issue is degeneracythe existence of a large set of reward functions for which the observed policy is optimal. To remove...},
author = {Ng, Andrew and Russell, Stuart},
booktitle = {Proceedings of the Seventeenth International Conference on Machine Learning},
doi = {10.2460/ajvr.67.2.323},
editor = {{De Sousa}, Jorge Pinho},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Ng, Russell - 2000 - Algorithms for inverse reinforcement learning.pdf:pdf},
issn = {00029645},
pages = {663--670},
pmid = {16454640},
publisher = {Morgan Kaufmann Publishers Inc.},
title = {{Algorithms for inverse reinforcement learning}},
url = {http://www-cs.stanford.edu/people/ang/papers/icml00-irl.pdf},
year = {2000}
}
@article{NJLS09,
author = {Nemirovski, A and Juditsky, A and Lan, G and Shapiro, A},
journal = {SIAM Journal on Optimization},
pages = {1574--1609},
title = {{Robust stochastic approximation approach to stochastic programming}},
volume = {19},
year = {2009}
}
@inproceedings{chawla2003smoteboost:,
author = {Chawla, Nitesh V and Lazarevic, Aleksandar and Hall, Lawrence O and Bowyer, Kevin W},
booktitle = {PKDD},
pages = {107--119},
title = {{SMOTEBoost: Improving Prediction of the Minority Class in Boosting.}},
year = {2003}
}
@inproceedings{xing2003generalized,
annote = {comps{\_}models},
author = {Xing, Eric P and Jordan, Michael I and Russell, Stuart J},
booktitle = {Proceedings of UAI},
pages = {583--591},
title = {{A generalized mean field algorithm for variational inference in exponential families}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.7.6058},
year = {2003}
}
@inproceedings{hazan2011beyond,
author = {Hazan, Elad and Kale, Satyen},
booktitle = {Conference on Learning Theory},
title = {{Beyond the regret minimization barrier: an optimal algorithm for stochastic strongly-convex optimization.}},
year = {2011}
}
@article{boykov2001fast,
author = {Boykov, Y and Veksler, O and Zabih, R},
journal = {IEEE Trans. PAMI},
number = {11},
pages = {1222--1239},
title = {{Fast approximate energy minimization via graph cuts}},
volume = {23},
year = {2001}
}
@techreport{Nes11,
author = {Nesterov, Y},
institution = {Universit{\{}{\'{e}}{\}} catholique de Louvain, Center for Operations Research and Econometrics (CORE)},
title = {{Random gradient-free minimization of convex functions}},
type = {CORE Discussion Papers},
year = {2011}
}
@article{cunningham1984testing,
author = {Cunningham, W H},
journal = {Journal of Combinatorial Theory, Series B},
number = {2},
pages = {161--188},
publisher = {Elsevier},
title = {{Testing membership in matroid polyhedra}},
volume = {36},
year = {1984}
}
@article{arandjelovic2009methodology,
author = {Arandjelovic, Ognjen and Cipolla, Roberto},
journal = {Computer Vision and Image Understanding},
number = {2},
pages = {159--171},
title = {{A Methodology for Rapid Illumination-Invariant Face Recognition using Image Processing Filters}},
volume = {113},
year = {2009}
}
@incollection{choi2012nonparametric,
author = {Choi, Jaedeug and Kim, Kee-Eung},
booktitle = {Advances in Neural Information Processing Systems 25},
editor = {Bartlett, P and Pereira, F C N and Burges, C J C and Bottou, L and Weinberger, K Q},
pages = {314--322},
title = {{Nonparametric Bayesian Inverse Reinforcement Learning for Multiple Reward Functions}},
url = {http://books.nips.cc/papers/files/nips25/NIPS2012{\_}0159.pdf},
year = {2012}
}
@inproceedings{carpentier2016revealing,
abstract = {We study a graph bandit setting where the objective of the learner is to detect the most influential node of a graph by requesting as little information from the graph as possible. One of the relevant applications for this setting is marketing in social networks, where the marketer aims at finding and taking advantage of the most influential customers. The existing approaches for bandit problems on graphs require either partial or complete knowledge of the graph. In this paper, we do not assume any knowledge of the graph, but we consider a setting where it can be gradually discovered in a sequential and active way. At each round, the learner chooses a node of the graph and the only information it receives is a stochastic set of the nodes that the chosen node is currently influencing. To address this setting, we propose BARE, a bandit strategy for which we prove a regret guarantee that scales with the detectable dimension, a problem dependent quantity that is often much smaller than the number of nodes.},
author = {Carpentier, Alexandra and Valko, Michal},
booktitle = {International Conference on Artificial Intelligence and Statistics},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Carpentier, Valko - 2016 - Revealing graph bandits for maximizing local influence.pdf:pdf},
title = {{Revealing graph bandits for maximizing local influence}},
year = {2016}
}
@inproceedings{azar2014online,
abstract = {In this paper we consider the problem of online stochastic optimization of a locally smooth function under bandit feedback. We introduce the high-confidence tree (HCT) algorithm, a novel any-time {\$}\backslashmathcal{\{}X{\}}{\$}-armed bandit algorithm, and derive regret bounds matching the performance of existing state-of-the-art in terms of dependency on number of steps and smoothness factor. The main advantage of HCT is that it handles the challenging case of correlated rewards, whereas existing methods require that the reward-generating process of each arm is an identically and independent distributed (iid) random process. HCT also improves on the state-of-the-art in terms of its memory requirement as well as requiring a weaker smoothness assumption on the mean-reward function in compare to the previous anytime algorithms. Finally, we discuss how HCT can be applied to the problem of policy search in reinforcement learning and we report preliminary empirical results.},
author = {Azar, Mohammad Gheshlaghi and Lazaric, Alessandro and Brunskill, Emma},
booktitle = {International Conference on Machine Learning},
title = {{Online stochastic optimization under correlated bandit feedback}},
year = {2014}
}
@inproceedings{kalyanakrishnan2012pac,
author = {Kalyanakrishnan, Shivaram and Tewari, Ambuj and Auer, Peter and Stone, Peter},
booktitle = {International Conference on Machine Learning},
title = {{PAC subset selection in stochastic multi-armed bandits}},
year = {2012}
}
@book{Sch03,
author = {Schrijver, A},
publisher = {Springer},
title = {{Combinatorial Optimization}},
year = {2003}
}
@inproceedings{valko2011conditionala,
abstract = {In this paper, we consider the problem of conditional anomaly detection that aims to identify data instances with an unusual response or a class label. We develop a new non-parametric approach for conditional anomaly detection based on the soft harmonic solution, with which we estimate the confidence of the label to detect anomalous mislabeling. We further regularize the solution to avoid the detection of isolated examples and examples on the boundary of the distribution support. We demonstrate the efficacy of the proposed method on several synthetic and UCI ML datasets in detecting unusual labels when compared to several baseline approaches. We also evaluate the performance of our method on a real-world electronic health record dataset where we seek to identify unusual patient-management decisions.},
author = {Valko, Michal and Kveton, Branislav and Valizadegan, Hamed and Cooper, Gregory F and Hauskrecht, Milos},
booktitle = {Proceedings of the 2011 IEEE International Conference on Data Mining},
keywords = {misovalko},
mendeley-tags = {misovalko},
month = {jun},
title = {{Conditional anomaly detection with soft harmonic functions}},
year = {2011}
}
@incollection{sinha2009semi,
author = {Sinha, K and M.Belkin},
booktitle = {Advances in Neural Information Processing Systems 22},
publisher = {NIPS Foundation (http://books.nips.cc)},
title = {{Semi-supervised Learning using Sparse Eigenfunction Bases}},
year = {2009}
}
@inproceedings{collins00discriminative,
author = {Collins, Michael},
pages = {175--182},
title = {{Discriminative Reranking for Natural Language Parsing}}
}
@inproceedings{klein03a*parsing,
address = {Morristown, NJ, USA},
author = {Klein, Dan and Manning, Christopher D},
booktitle = {NAACL '03: Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology},
keywords = {algorithm,nlp,parsing,viterbi},
pages = {40--47},
publisher = {Association for Computational Linguistics},
title = {{{\{}A{\}}{\^{}}* parsing: fast exact Viterbi parse selection}},
url = {http://portal.acm.org/citation.cfm?id=1073461},
year = {2003}
}
@misc{chang2001libsvm:,
annote = {Software available at http://www.csie.ntu.edu.tw/{\~{}}cjlin/libsvm},
author = {Chang, Chih-Chung and Lin, Chih-Jen},
title = {{{\{}LIBSVM{\}}: a library for support vector machines}},
year = {2001}
}
@incollection{simunic2002dynamic,
address = {New York, NY},
author = {Simunic, Tajana},
booktitle = {Power Aware Computing},
publisher = {Kluwer Academic Publishers},
title = {{Dynamic Management of Power Consumption}},
year = {2002}
}
@article{KW01,
author = {Kivinen, J and Warmuth, M},
journal = {Machine Learning},
pages = {301--329},
title = {{Relative loss bounds for multidimensional regression problems}},
volume = {45},
year = {2001}
}
@inproceedings{ng2000,
author = {Ng, A Y and Russell, S},
pages = {663--670},
title = {{Algorithms for Inverse Reinforcement Learning}},
url = {citeseer.ist.psu.edu/ng00algorithms.html}
}
@inproceedings{younes2004solving,
author = {Younes, Hakan and Simmons, Reid},
booktitle = {Proceedings of the 19th National Conference on Artificial Intelligence},
pages = {742--747},
title = {{Solving Generalized Semi-{\{}Markov{\}} Decision Processes Using Continuous Phase-Type Distributions}},
year = {2004}
}
@inproceedings{audiffren2014messi,
abstract = {A popular approach to apprenticeship learning (AL) is to formulate it as an inverse reinforcement learning (IRL) problem. The MaxEnt-IRL algorithm successfully integrates the maximum entropy principle into IRL and unlike its predecessors, it resolves the ambiguity arising from the fact that a possibly large number of policies could match the expert's behavior. In this paper, we study an AL setting in which in addition to the expert's trajectories, a number of unsupervised trajectories is available. We introduce MESSI, a novel algorithm that combines MaxEnt-IRL with principles coming from semi-supervised learning. In particular, MESSI integrates the unsupervised data into the MaxEnt-IRL framework using a pairwise penalty on trajectories. Empirical results in a highway driving and grid-world problems indicate that MESSI is able to take advantage of the unsupervised trajectories and improve the performance of MaxEnt-IRL.},
author = {Audiffren, Julien and Valko, Michal and Lazaric, Alessandro and Ghavamzadeh, Mohammad},
booktitle = {NIPS Workshop on Novel Trends and Applications in Reinforcement Learning},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Audiffren et al. - 2014 - MESSI Maximum entropy semi-supervised inverse reinforcement learning.pdf:pdf},
title = {{MESSI: Maximum entropy semi-supervised inverse reinforcement learning}},
year = {2014}
}
@inproceedings{dash2002exact,
address = {San Francisco, CA, USA},
annote = {comps{\_}models},
author = {Dash, Denver and Cooper, Gregory F},
booktitle = {ICML '02: Proceedings of the Nineteenth International Conference on Machine Learning},
isbn = {1-55860-873-7},
pages = {91--98},
publisher = {Morgan Kaufmann Publishers Inc.},
title = {{Exact model averaging with naive {\{}B{\}}ayesian classifiers}},
url = {http://www.pittsburgh.intel-research.net/{~}dhdash//docs/icml{\_}02.pdf},
year = {2002}
}
@manual{nvidia-cusparse,
title = {{NVIDIA CUSPARSE and CUBLAS Libraries}},
url = {http://www.nvidia.com/object/cuda{\_}develop.html},
year = {2012}
}
@article{tibshirani2005sparsity,
author = {Tibshirani, R and Saunders, M and Rosset, S and Zhu, J and Knight, K},
journal = {Journal of the Royal Statistical Society. Series B, Statistical Methodology},
pages = {91--108},
title = {{Sparsity and smoothness via the fused {\{}L{\}}asso}},
year = {2005}
}
@inproceedings{ortner08deterministic,
author = {Ortner, Ronald},
booktitle = {Proceedings of the 19th International Conference on Algorithmic Learning Theory, ALT 2008},
title = {{Online Regret Bounds for {\{}M{\}}arkov Decision Processes with Deterministic Transitions}},
year = {2008}
}
@article{hein2007graph,
author = {Hein, Matthias and Audibert, Jean-Yves and von Luxburg, Ulrike},
issn = {1532-4435},
journal = {J. Mach. Learn. Res.},
month = {dec},
pages = {1325--1370},
publisher = {JMLR.org},
title = {{Graph Laplacians and their Convergence on Random Neighborhood Graphs}},
url = {http://portal.acm.org/citation.cfm?id=1314498.1314544},
volume = {8},
year = {2007}
}
@inproceedings{levine2011nonlinear,
author = {Levine, Sergey and Popovic, Zoran and Koltun, Vladlen},
booktitle = {NIPS},
isbn = {9781457700798},
pages = {1--9},
title = {{Nonlinear Inverse Reinforcement Learning with Gaussian Processes}},
url = {http://www.stanford.edu/{~}svlevine/papers/gpirl.pdf},
year = {2011}
}
@inproceedings{charpiatexhaustive,
author = {Charpiat, G},
booktitle = {Proc. CVPR},
title = {{Exhaustive Family of Energies Minimizable Exactly by a Graph Cut}},
year = {2011}
}
@inproceedings{balcan2005person,
author = {Balcan, Maria-Florina and Blum, Avrim and Choi, Patrick Pakyan and Lafferty, John and Pantano, Brian and Rwebangira, Mugizi Robert and Zhu, Xiaojin},
booktitle = {ICML 2005 Workshop on Learning with Partially Classified Training Data},
title = {{Person Identification in Webcam Images: An Application of Semi-Supervised Learning}},
year = {2005}
}
@inproceedings{black93history,
address = {Morristown, NJ, USA},
author = {Black, Ezra and Jelinek, Fred and Lafferty, John and Magerman, David M and Mercer, Robert and Roukos, Salim},
booktitle = {ACL '93: Proceedings of the 31st Annual Meeting of the Association for Computational Linguistics},
pages = {31--37},
publisher = {Association for Computational Linguistics},
title = {{Towards history-based grammars: using richer models for probabilistic parsing}},
year = {1993}
}
@article{fawcett1997adaptive,
address = {Hingham, MA, USA},
annote = {comps{\_}ano},
author = {Fawcett, Tom and Provost, Foster},
doi = {http://dx.doi.org/10.1023/A:1009700419189},
issn = {1384-5810},
journal = {Data Min. Knowl. Discov.},
number = {3},
pages = {291--316},
publisher = {Kluwer Academic Publishers},
title = {{Adaptive Fraud Detection}},
volume = {1},
year = {1997}
}
@book{horn1990matrix,
abstract = {Linear algebra and matrix theory have long been fundamental tools in mathematical disciplines as well as fertile fields for research. In this book the authors present classical and recent results of matrix analysis that have proved to be important to applied mathematics. Facts about matrices, beyond those found in an elementary linear algebra course, are needed to understand virtually any area of mathematical science, but the necessary material has appeared only sporadically in the literature and in university curricula. As interest in applied mathematics has grown, the need for a text and reference offering a broad selection of topics in matrix theory has become apparent, and this book meets that need. This volume reflects two concurrent views of matrix analysis. First, it encompasses topics in linear algebra that have arisen out of the needs of mathematical analysis. Second, it is an approach to real and complex linear algebraic problems that does not hesitate to use notions from analysis. Review and miscellanea -- Eigenvalues, eigenvectors, and similarity.},
author = {Horn, Roger A. and Johnson, Charles R.},
publisher = {Cambridge University Press},
title = {{Matrix analysis}},
year = {1990}
}
@article{watts1998collective,
abstract = {Networks of coupled dynamical systems have been used to model biological oscillators1–4 , Josephson junction arrays5,6 , excitable media7 , neural networks8–10 , spatial games11 , genetic control networks12 and many other self-organizing systems. Ordinarily, the connection topology is assumed to be either completely regular or completely random. Butmany biological, technological and social networks lie somewhere between these two extremes. Here we explore simple models of networks that can be tuned through this middle ground: regular networks ‘rewired' to intro- duce increasing amounts of disorder. We find that these systems can be highly clustered, like regular lattices, yet have small characteristic path lengths, like random graphs. We call them ‘small-world' networks, by analogy with the small-world phenomenon13,14 (popularly known as six degrees of separation15 ). The neural network of the worm Caenorhabditis elegans, the power grid of the western United States, and the collaboration graph of film actors are shown to be small-world networks. Models of dynamical systems with small-world coupling display enhanced signal-propagation speed, computational power, and synchronizability. In particular, infectious diseases spread more easily in small-world networks than in regular lattices.},
author = {Watts, Duncan J. and Strogatz, Steven H.},
journal = {Nature},
pages = {440--442},
title = {{Collective dynamics of small-world networks}},
volume = {393},
year = {1998}
}
@article{Tibshirani1996,
author = {Tibshirani, R},
journal = {Journal of the Royal Statistical Society. Series B},
pages = {267--288},
title = {{Regression shrinkage and selection via the {\{}L{\}}asso}},
year = {1996}
}
@inproceedings{jamali2010matrix,
author = {Jamali, Mohsen and Ester, Martin},
booktitle = {Conference on Recommender systems},
title = {{A matrix factorization technique with trust propagation for recommendation in social networks}},
year = {2010}
}
@inproceedings{feige2007maximizing,
author = {Feige, U and Mirrokni, V S and Vondrak, J},
booktitle = {Proc. Symposium on Foundations of Computer Science},
organization = {IEEE Computer Society},
pages = {461--471},
title = {{Maximizing Non-Monotone Submodular Functions}},
year = {2007}
}
@inproceedings{EWK14,
author = {van Erven, Tim and Warmuth, Manfred and Owski, Wojciech Kot$\backslash$l},
pages = {949--974},
title = {{Follow the Leader with Dropout Perturbations}}
}
@book{sutton1998reinforcement,
address = {Cambridge, MA},
author = {Sutton, Richard and Barto, Andrew},
publisher = {MIT Press},
title = {{Reinforcement Learning: An Introduction}},
year = {1998}
}
@inproceedings{subramanya2009large,
author = {Subramanya, Amarnag and Bilmes, Jeff},
booktitle = {Workshop on Large-Scale Machine Learning: Parallelism and Massive Datasets at Neural Information Processing Systems},
title = {{Large-Scale Graph-based Transductive Inference}},
year = {2009}
}
@article{kivinen2004online,
abstract = {Kernel-based algorithms such as support vector machines have achieved considerable success in various problems in batch setting, where all of the training data is available in advance. Support vector machines combine the so-called kernel trick with the large margin idea. There has been little use of these methods in an online setting suitable for real-time applications. In this paper, we consider online learning in a reproducing kernel Hilbert space. By considering classical stochastic gradient descent within a feature space and the use of some straightforward tricks, we develop simple and computationally efficient algorithms for a wide range of problems such as classification, regression, and novelty detection. In addition to allowing the exploitation of the kernel trick in an online setting, we examine the value of large margins for classification in the online setting with a drifting target. We derive worst-case loss bounds, and moreover, we show the convergence of the hypothesis to the minimizer of the regularized risk functional. We present some experimental results that support the theory as well as illustrating the power of the new algorithms for online novelty detection.},
author = {Kivinen, Jyrki and Smola, Alexander J. and Williamson, Robert C.},
journal = {IEEE Transactions on Signal Processing},
number = {8},
pages = {2165--2176},
title = {{Online learning with kernels}},
volume = {52},
year = {2004}
}
@inproceedings{sutton1996generalization,
author = {Sutton, Richard},
booktitle = {Advances in Neural Information Processing Systems 8},
pages = {1038--1044},
title = {{Generalization in Reinforcement Learning: Successful Examples Using Sparse Coarse Coding}},
year = {1996}
}
@inproceedings{grill2015black-box,
abstract = {We study the problem of black-box optimization of a function f of any dimension, given function evaluations perturbed by noise. The function is assumed to be locally smooth around one of its global optima, but this smoothness is unknown. Our contribution is an adaptive optimization algorithm, POO or parallel optimistic optimization, that is able to deal with this setting. POO performs almost as well as the best known algorithms requiring the knowledge of the smoothness. Furthermore, POO works for a larger class of functions than what was previously considered, especially for functions that are difficult to optimize, in a very precise sense. We provide a finite-time analysis of POO's performance, which shows that its error after n evaluations is at most a factor of sqrt(ln n) away from the error of the best known optimization algorithms using the knowledge of the smoothness.},
author = {Grill, Jean-Bastien and Valko, Michal and Munos, R{\'{e}}mi},
booktitle = {Neural Information Processing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Grill, Valko, Munos - 2015 - Black-box optimization of noisy functions with unknown smoothness.pdf:pdf},
title = {{Black-box optimization of noisy functions with unknown smoothness}},
year = {2015}
}
@article{montague,
author = {Montague, P R and Dayan, P and Person, C and Sejnowski, T J},
journal = {Nature},
pages = {725--728},
title = {{Bee foraging in uncertain environments using predictive Hebbian learning}},
volume = {377},
year = {1995}
}
@article{spielman_nearly_2014,
author = {Spielman, Daniel A and Teng, Shang-Hua},
journal = {SIAM Journal on Matrix Analysis and Applications},
number = {3},
pages = {835--885},
title = {{Nearly linear time algorithms for preconditioning and solving symmetric, diagonally dominant linear systems}},
url = {http://epubs.siam.org/doi/abs/10.1137/090771430},
volume = {35},
year = {2014}
}
@article{karypis1999fast,
author = {Karypis, G and Kumar, V},
journal = {SIAM Journal on Scientific Computing},
pages = {359--392},
title = {{A fast and high quality multilevel scheme for partitioning irregular graphs}},
volume = {20},
year = {1999}
}
@article{minoux1978accelerated,
author = {Minoux, M},
journal = {Optimization Techniques},
pages = {234--243},
publisher = {Springer},
title = {{Accelerated greedy algorithms for maximizing submodular set functions}},
year = {1978}
}
@article{fujishige1980lexicographically,
author = {Fujishige, Satoru},
journal = {Mathematics of Operations Research},
number = {2},
pages = {186--196},
publisher = {JSTOR},
title = {{Lexicographically optimal base of a polymatroid with respect to a weight vector}},
volume = {5},
year = {1980}
}
@article{Spa97,
author = {Spall, J},
journal = {Automatica},
number = {1},
pages = {109--112},
title = {{A one-measurement form of simultaneous perturbation stochastic approximation}},
volume = {33},
year = {1997}
}
@book{kohn2000to,
address = {Washington DC},
author = {Kohn, L and Corrigan, J and Donaldson, M},
keywords = {imported},
publisher = {National Academy Press},
title = {{To Err Is Human: Building a Safer Health System}},
year = {2000}
}
@article{bubeck2011x,
abstract = {We consider a generalization of stochastic bandits where the set of arms, cX, is allowed to be a generic measurable space and the mean-payoff function is "locally Lipschitz" with respect to a dissimilarity function that is known to the decision maker. Under this condition we construct an arm selection policy, called HOO (hierarchical optimistic optimization), with improved regret bounds compared to previous results for a large class of problems. In particular, our results imply that if cX is the unit hypercube in a Euclidean space and the mean-payoff function has a finite number of global maxima around which the behavior of the function is locally continuous with a known smoothness degree, then the expected regret of HOO is bounded up to a logarithmic factor by sqrtn, i.e., the rate of growth of the regret is independent of the dimension of the space. We also prove the minimax optimality of our algorithm when the dissimilarity is a metric. Our basic strategy has quadratic computational complexity as a function of the number of time steps and does not rely on the doubling trick. We also introduce a modified strategy, which relies on the doubling trick but runs in linearithmic time. Both results are improvements with respect to previous approaches.},
author = {Bubeck, S{\'{e}}bastien and Munos, R{\'{e}}mi and Stoltz, Gilles and Szepesv{\'{a}}ri, Csaba},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Bubeck et al. - 2011 - X-armed bandits.pdf:pdf},
journal = {Journal of Machine Learning Research},
pages = {1587--1627},
title = {{X-armed Bandits}},
volume = {12},
year = {2011}
}
@inproceedings{weinberger2008fast,
address = {New York, NY, USA},
annote = {comps{\_}distance},
author = {Weinberger, Kilian Q and Saul, Lawrence K},
booktitle = {ICML '08: Proceedings of the 25th international conference on Machine learning},
doi = {http://doi.acm.org/10.1145/1390156.1390302},
isbn = {978-1-60558-205-4},
pages = {1160--1167},
publisher = {ACM},
title = {{Fast solvers and efficient implementations for distance metric learning}},
year = {2008}
}
@inproceedings{gabillon2012best,
author = {Gabillon, Victor and Ghavamzadeh, Mohammad and Lazaric, Alessandro},
booktitle = {Neural Information Processing Systems},
title = {{Best arm identification: A unified approach to fixed budget and fixed confidence}},
year = {2012}
}
@article{harisson1978,
author = {Harrison, D and Rubinfeld, D L},
journal = {J. Environ. Economics {\&} Management},
pages = {81--102},
title = {{Hedonic prices and the demand for clean air}},
volume = {5},
year = {1978}
}
@book{Bar02,
author = {Barvinok, A},
publisher = {American Mathematical Society},
title = {{A Course in Convexity}},
year = {2002}
}
@article{fine1997prediction,
author = {Fine, M J and Auble, T E and Yealy, D M and Hanusa, B H and Weissfeld, L A and Singer, D E and Coley, C M and Marrie, T J and Kapoor, W N},
journal = {New England Journal of Medicine},
number = {4},
pages = {243--250},
title = {{A Prediction Rule to Identify Low-Risk Patients with Community-Acquired Pneumonia}},
volume = {336},
year = {1997}
}
@article{krause11submodularity,
author = {Krause, Andreas and Guestrin, Carlos},
journal = {ACM Transactions on Intelligent Systems and Technology},
number = {4},
title = {{Submodularity and its Applications in Optimized Information Gathering}},
volume = {2},
year = {2011}
}
@book{winograd72understanding,
address = {Orlando, FL, USA},
author = {Winograd, Terry},
isbn = {0127597506},
publisher = {Academic Press, Inc.},
title = {{Understanding Natural Language}},
year = {1972}
}
@inproceedings{kveton2010semi--supervised,
abstract = {This paper proposes a novel algorithm for semisupervised learning. This algorithm learns graph cuts that maximize the margin with respect to the labels induced by the harmonic function solution. We motivate the approach, compare it to existing work, and prove a bound on its generalization error. The quality of our solutions is evaluated on a synthetic problem and three UCI ML repository datasets. In most cases, we outperform manifold regularization of support vector machines, which is a state-of-the-art approach to semi-supervised max-margin learning.},
author = {Kveton, Branislav and Valko, Michal and Rahimi, Ali and Huang, Ling},
booktitle = {Proceedings of The Thirteenth International Conference on Artificial Intelligence and Statistics (AISTATS)},
editor = {Teh, Y W and Titterington, M},
keywords = {misovalko},
mendeley-tags = {misovalko},
number = {W{\&}CP 9},
pages = {421--428},
title = {{Semi-Supervised Learning with Max-Margin Graph Cuts}},
volume = {9},
year = {2010}
}
@article{REGK14,
author = {de Rooij, Steven and van Erven, Tim and Gr{\"{u}}nwald, Peter D and Koolen, Wouter M},
journal = {Accepted to the Journal of Machine Learning Research},
title = {{Follow the Leader If You Can, Hedge If You Must}},
year = {2014}
}
@article{chandola2009anomaly,
address = {New York, NY, USA},
author = {Chandola, Varun and Banerjee, Arindam and Kumar, Vipin},
doi = {http://doi.acm.org/10.1145/1541880.1541882},
issn = {0360-0300},
journal = {ACM Comput. Surv.},
keywords = {Anomaly detection,outlier detection},
month = {jul},
number = {3},
pages = {15:1--15:58},
publisher = {ACM},
title = {{Anomaly detection: A survey}},
volume = {41},
year = {2009}
}
@inproceedings{narasimhan2015learnability,
author = {Narasimhan, Harikrishna and Parkes, David C. and Singer, Yaron},
booktitle = {Neural Information Processing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Narasimhan, Parkes, Singer - 2015 - Learnability of influence in networks.pdf:pdf},
title = {{Learnability of influence in networks}},
year = {2015}
}
@inproceedings{auerlogarithmic,
author = {Auer, Peter and Ortner, Ronald},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {49--56},
title = {{Logarithmic online regret bounds for undiscounted reinforcement learning}}
}
@book{scholkopf2001learning,
author = {Sch{\"{o}}lkopf, Bernhard and Smola, Alexander J.},
publisher = {MIT Press},
title = {{Learning with kernels: Support vector machines, regularization, optimization, and beyond}},
year = {2001}
}
@article{weissman03ineq,
author = {Weissman, Tsachy and Ordentlich, Erik and Seroussi, Gadiel and Verdu, Sergio and Weinberger, Marcelo J},
institution = {Technical Report, HP Laboratories},
title = {{Inequalities for the L1 Deviation of the Empirical Distribution}},
year = {2003}
}
@article{narasimhan2006q,
author = {Narasimhan, M and Jojic, N and Bilmes, J},
journal = {Adv. NIPS},
title = {{Q-clustering}},
volume = {18},
year = {2006}
}
@inproceedings{devroye2013random,
author = {Devroye, Luc and Lugosi, G{\'{a}}bor and Neu, Gergely},
booktitle = {Conference on Learning Theory},
title = {{Prediction by random-walk perturbation}},
year = {2013}
}
@article{agarwal2011stochastic,
abstract = {This paper addresses the problem of minimizing a convex, Lipschitz function f over a convex, compact set xset under a stochastic bandit feedback model. In this model, the algorithm is allowed to observed noisy realizations of the function value f(x) at any query point x in xset. The quantity of interest is regret of the algorithm, which is the sum of the function values at algorithm's query points minus the optimal function value. We demonstrate a generalization of the ellipsoid algorithm that incurs otil(poly(d)sqrtT) regret. Since any algorithm has regret at least Omega(sqrtT) on this problem, our algorithm is optimal in terms of the scaling with T.},
author = {Agarwal, Alekh and Foster, Dean P and Hsu, Daniel and Kakade, Sham M and Rakhlin, Alexander},
journal = {Statistics},
pages = {1--24},
title = {{Stochastic convex optimization with bandit feedback}},
url = {http://arxiv.org/abs/1107.1744},
year = {2011}
}
@inproceedings{klein2013cascaded,
abstract = {This paper considers the Inverse Reinforcement Learning (IRL) problem, that is inferring a reward function for which a demonstrated expert policy is optimal. We propose to break the IRL problem down into two generic Supervised Learning steps: this is the Cascaded Supervised IRL (CSI) approach. A classification step that defines a score function is followed by a regression step providing a reward function. A theoretical analysis shows that the demonstrated expert policy is nearoptimal for the computed reward function. Not needing to repeatedly solve a Markov Decision Process (MDP) and the ability to leverage existing techniques for classification and regression are two important advantages of the CSI approach. It is furthermore empirically demonstrated to compare positively to state-of-the-art approaches when using only transitions sampled according to the expert policy, up to the use of some heuristics. This is exemplified on two classical benchmarks (the mountain car problem and a highway driving simulator).},
address = {Prague (Czech Republic)},
author = {Klein, Edouard and PIOT, Bilal and Geist, Matthieu and Pietquin, Olivier},
booktitle = {Proceedings of the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases (ECML/PKDD 2013)},
doi = {10.1007/978-3-642-40988-2_1},
editor = {Blockeel, Hendrik and Kersting, Kristian and Nijssen, Siegfried and Zelezny, Filip},
isbn = {978-3-642-40987-5},
month = {sep},
pages = {1--16},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
title = {{A cascaded supervised learning approach to inverse reinforcement learning}},
url = {http://www.ecmlpkdd2013.org/wp-content/uploads/2013/07/327.pdf},
volume = {8188},
year = {2013}
}
@article{turner2010fast,
abstract = {We present methods to do fast online anomaly detection using scan statistics. Scan statistics have long been used to detect statistically significant bursts of events. We extend the scan statistics framework to handle many practical issues that occur in application: dealing with an unknown background rate of events, allowing for slow natural changes in background frequency, the inverse problem of finding an unusual lack of events, and setting the test parameters to maximize power. We demonstrate its use on real and synthetic data sets with comparison to other methods.},
author = {Turner, Ryan and Ghahramani, Zoubin and Bottone, Steven},
doi = {10.1109/MLSP.2010.5589151},
isbn = {978-1-4244-7876-7},
issn = {1551-2541},
journal = {IEEE Workshop on Machine Learning for Signal Processing},
keywords = {scan statistics},
mendeley-tags = {scan statistics},
title = {{Fast online anomaly detection using scan statistics}},
year = {2010}
}
@inproceedings{erraqabi2017trading,
abstract = {In multi-armed bandits, the most common objective is the maximization of the cumulative reward. Alternative settings include active exploration, where a learner tries to gain accurate estimates of the rewards of all arms. While these objectives are contrasting, in many scenarios it is desirable to trade off rewards and errors. For instance, in educational games the designer wants to gather generalizable knowledge about the behavior of the students and teaching strategies (small estimation errors) but, at the same time, the system needs to avoid giving a bad experience to the players, who may leave the system permanently (large reward). In this paper, we formalize this tradeoff and introduce the ForcingBalance algorithm whose performance is provably close to the best possible tradeoff strategy. Finally, we demonstrate on real-world educational data that ForcingBalance returns useful information about the arms without compromising the overall reward.},
author = {Erraqabi, Akram and Lazaric, Alessandro and Valko, Michal and Brunskill, Emma and Liu, Yun-en},
booktitle = {International Conference on Artificial Intelligence and Statistics},
file = {:Users/miki/Dropbox/research/bandits/educational/2016-AISTATS-discovery-bandit/erraqabi2017trading.pdf:pdf},
title = {{Trading off rewards and errors in multi-armed bandits}},
year = {2017}
}
@inproceedings{valizadegan2007kernel,
author = {Valizadegan, Hamed and Tan, Pang-Ning},
booktitle = {Proceedings of the Seventh SIAM International Conference on Data Mining, April 26-28, 2007, Minneapolis, Minnesota, USA},
title = {{Kernel Based Detection of Mislabeled Training Examples}},
year = {2007}
}
@article{kveton2016learning,
abstract = {Many important optimization problems, such as the minimum spanning tree and minimum-cost flow, can be solved optimally by a greedy method. In this work, we study a learning variant of these problems, where the model of the problem is unknown and has to be learned by interacting repeatedly with the environment in the bandit setting. We formalize our learning problem quite generally, as learning how to maximize an unknown modular function on a known polymatroid. We propose a computationally efficient algorithm for solving our problem and bound its expected cumulative regret. Our gap-dependent upper bound is tight up to a constant and our gap-free upper bound is tight up to polylogarithmic factors. Finally, we evaluate our method on three problems and demonstrate that it is practical.},
author = {Kveton, Branislav and Wen, Zheng and Ashkan, Azin and Valko, Michal},
journal = {Journal of Machine Learning Research},
title = {{Learning to act greedily: Polymatroid semi-bandits}},
year = {2016}
}
@inproceedings{valko2013stochastic,
abstract = {We study the problem of global maximization of a function f given a finite number of evaluations perturbed by noise. We consider a very weak assumption on the function, namely that it is locally smooth (in some precise sense) with respect to some semi-metric, around one of its global maxima. Compared to previous works on bandits in general spaces (Kleinberg et al., 2008; Bubeck et al., 2011a) our algorithm does not require the knowledge of this semi-metric. Our algorithm, StoSOO, follows an optimistic strategy to iteratively construct upper confidence bounds over the hierarchical partitions of the function domain to decide which point to sample next. A finite-time analysis of StoSOO shows that it performs almost as well as the best specifically-tuned algorithms even though the local smoothness of the function is not known.},
author = {Valko, Michal and Carpentier, Alexandra and Munos, R{\'{e}}mi},
booktitle = {International Conference on Machine Learning},
title = {{Stochastic simultaneous optimistic optimization}},
year = {2013}
}
@phdthesis{zhu2005semi-supervised,
address = {Pittsburgh, PA, USA},
annote = {AAI3179046},
author = {Zhu, Xiaojin},
isbn = {0-542-19059-1},
publisher = {Carnegie Mellon University},
school = {Carnegie Mellon University},
title = {{Semi-supervised learning with graphs}},
year = {2005}
}
@article{song2007conditional,
address = {Piscataway, NJ, USA},
annote = {Fellow-Sanjay Ranka
comps{\_}ano},
author = {Song, Xiuyao and Wu, Mingxi and Jermaine, Christopher},
doi = {http://dx.doi.org/10.1109/TKDE.2007.1009},
issn = {1041-4347},
journal = {IEEE Transactions on Knowledge and Data Engineering},
number = {5},
pages = {631--645},
publisher = {IEEE Educational Activities Department},
title = {{Conditional Anomaly Detection}},
volume = {19},
year = {2007}
}
@inproceedings{MB04,
author = {McMahan, H Brendan and Blum, Avrim},
booktitle = {In Proceedings of the 17th Annual Conference on Learning Theory (COLT)},
pages = {109--123},
title = {{Online geometric optimization in the bandit setting against an adaptive adversary}},
year = {2004}
}
@inproceedings{BLLRS11,
author = {Beygelzimer, Alina and Langford, John and Li, Lihong and Reyzin, Lev and Schapire, Robert E},
pages = {19--26},
title = {{Contextual Bandit Algorithms with Supervised Learning Guarantees}}
}
@inproceedings{auer1995gambling,
author = {Auer, Peter and Cesa-Bianchi, Nicol{\`{o}} and Freund, Yoav and Schapire, Robert},
booktitle = {Proceedings of the 36th Annual Symposium on Foundations of Computer Science},
pages = {322--331},
title = {{Gambling in a Rigged Casino: The Adversarial Multi-Armed Bandit problem}},
year = {1995}
}
@incollection{mccormick2006submodular,
author = {{S. Thomas McCormick}},
booktitle = {Handbook on Discrete Optimization},
chapter = {Submodular},
editor = {Nemhauser, G. and Aardal, K. and Weismantel, R.},
pages = {321--391},
publisher = {Elsevier},
title = {{Submodular function minimization}},
year = {2006}
}
@article{farias2003linear,
author = {de Farias, Daniela Pucci and {Van Roy}, Benjamin},
journal = {Mathematics of Operations Research},
number = {3},
pages = {462--478},
title = {{On Constraint Sampling for the Linear Programming Approach to Approximate Dynamic Programming}},
volume = {29},
year = {2003}
}
@article{iwata2001combinatorial,
author = {Iwata, S and Fleischer, L and Fujishige, Satoru},
journal = {Journal of the ACM},
number = {4},
pages = {761--777},
publisher = {ACM},
title = {{A combinatorial strongly polynomial algorithm for minimizing submodular functions}},
volume = {48},
year = {2001}
}
@inproceedings{GW98,
author = {Gentile, C and Warmuth, M},
booktitle = {Advances in Neural Information Processing Systems (NIPS)},
pages = {225--231},
title = {{Linear hinge loss and average margin}},
year = {1998}
}
@inproceedings{yu09ArbitraryRewardsTransitions,
address = {Piscataway, NJ, USA},
author = {Yu, J Y and Mannor, S},
booktitle = {GameNets'09: Proceedings of the First ICST International Conference on Game Theory for Networks},
isbn = {978-1-4244-4176-1},
pages = {314--322},
publisher = {IEEE Press},
title = {{Online learning in {\{}M{\}}arkov decision processes with arbitrarily changing rewards and transitions}},
year = {2009}
}
@inproceedings{rousseau2013graph,
author = {Rousseau, Fran{\c{c}}ois and Vazirgiannis, Michalis},
booktitle = {Proceedings of the 22nd ACM international conference on Conference on information {\&} knowledge management},
organization = {ACM},
pages = {59--68},
title = {{Graph-of-word and TW-IDF: new approach to ad hoc IR}},
year = {2013}
}
@book{manning99foundations,
address = {Cambridge, Massachusetts},
author = {Manning, Christopher D and Sch{\"{u}}tze, Hinrich},
publisher = {The {\{}MIT{\}} Press},
title = {{Foundations of Statistical Natural Language Processing}},
url = {citeseer.ist.psu.edu/635422.html},
year = {1999}
}
@inproceedings{narasimhan2004pac,
author = {Narasimhan, M and Bilmes, J},
booktitle = {Proc. UAI},
title = {{{\{}PAC{\}}-learning bounded tree-width graphical models}},
year = {2004}
}
@inproceedings{valko2008distance,
abstract = {Anomaly detection methods can be very useful in identifying unusual or interesting patterns in data. A recently proposed conditional anomaly detection framework extends anomaly detection to the problem of identifying anomalous patterns on a subset of attributes in the data. The anomaly always depends (is conditioned) on the value of remaining attributes. The work presented in this paper focuses on instance-based methods for detecting conditional anomalies. The methods depend heavily on the distance metric that lets us identify examples in the dataset that are most critical for detecting the anomaly. To optimize the performance of the anomaly detection methods we explore and study metric learning methods. We evaluate the quality of our methods on the Pneumonia PORT dataset by detecting unusual admission decisions for patients with the community-acquired pneumonia. The results of our metric learning methods show an improved detection performance over standard distance metrics, which is very promising for building automated anomaly detection systems for variety of intelligent monitoring applications.},
annote = {From Duplicate 1 ( Distance Metric Learning for Conditional Anomaly Detection - Valko, Michal; Hauskrecht, Milos )
From Duplicate 2 ( Distance Metric Learning for Conditional Anomaly Detection - Valko, Michal; Hauskrecht, Milos )
comps{\_}distances
From Duplicate 2 ( Distance Metric Learning for Conditional Anomaly Detection - Valko, Michal; Hauskrecht, Milos )
comps{\_}distances},
author = {Valko, Michal and Hauskrecht, Milos},
booktitle = {Twenty-First International Florida Artificial Intelligence Research Society Conference},
keywords = {misovalko},
mendeley-tags = {misovalko},
publisher = {AAAI Press},
title = {{Distance metric learning for conditional anomaly detection}},
year = {2008}
}
@inproceedings{calandriello2017distributed,
abstract = {Most kernel-based methods, such as kernel regression, kernel PCA, ICA, or k-means clustering, do not scale to large datasets, because constructing and storing the kernel matrix Kn requires at least O(n2) time and space for n samples. Recent works (Alaoui 2014, Musco 2016) show that sampling points with replacement according to their ridge leverage scores (RLS) generates small dictionaries of relevant points with strong spectral approximation guarantees for Kn. The drawback of RLS-based methods is that computing exact RLS requires constructing and storing the whole kernel matrix. In this paper, we introduce SQUEAK, a new algorithm for kernel approximation based on RLS sampling that sequentially processes the dataset, storing a dictionary which creates accurate kernel matrix approximations with a number of points that only depends on the effective dimension deffgamma of the dataset. Moreover since all the RLS estimations are efficiently performed using only the small dictionary, SQUEAK never constructs the whole matrix kermatrixn, runs in linear time widetildeO(ndeffgamma3) w.r.t.n, and requires only a single pass over the dataset. We also propose a parallel and distributed version of SQUEAK achieving similar accuracy in as little as widetildeO(log(n)deffgamma3) time.},
author = {Calandriello, Daniele and Lazaric, Alessandro and Valko, Michal},
booktitle = {International Conference on Artificial Intelligence and Statistics},
file = {:Users/miki/Dropbox/research/daniele{\_}research/2016/SUB{\_}2016{\_}aistat{\_}parallel{\_}kernel{\_}rls/camera{\_}ready/parallel{\_}kernel{\_}rls.pdf:pdf},
title = {{Distributed adaptive sampling for kernel matrix approximation}},
year = {2017}
}
@inproceedings{toby,
author = {Hocking, T and Joulin, A and Bach, F and Vert, J.-P.},
booktitle = {Proc. ICML},
title = {{Clusterpath: an Algorithm for Clustering using Convex Fusion Penalties}},
year = {2011}
}
@inproceedings{streeter2008online,
author = {{Matthew J. Streeter} and {Daniel Golovin}},
booktitle = {NIPS},
pages = {1577--1584},
title = {{An Online Algorithm for Maximizing Submodular Functions}},
year = {2008}
}
@inproceedings{WKH11,
author = {Warmuth, M and Koolen, W and Helmbold, D},
booktitle = {In Proceedings of the 22nd International Conference on Algorithmic Learning Theory (ALT)},
title = {{Combining initial segments of lists}},
year = {2011}
}
@article{kelner_spectral_2013,
author = {Kelner, Jonathan A and Levin, Alex},
journal = {Theory of Computing Systems},
number = {2},
pages = {243--262},
title = {{Spectral sparsification in the semi-streaming setting}},
volume = {53},
year = {2013}
}
@article{lee2010spectral,
author = {Lee, Ann B and Wasserman, Larry},
doi = {10.1198/jasa.2010.tm09754},
journal = {Journal of the American Statistical Association},
number = {0},
pages = {1--15},
title = {{Spectral Connectivity Analysis}},
url = {http://pubs.amstat.org/doi/abs/10.1198/jasa.2010.tm09754},
volume = {0},
year = {2010}
}
@inproceedings{cesa-bianchi2013online,
author = {Cesa-Bianchi, Nicol{\`{o}} and Dekel, Ofer and Shamir, Ohad},
booktitle = {Advances in Neural Information Processing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Cesa-Bianchi, Dekel, Shamir - 2013 - Online Learning with Switching Costs and Other Adaptive Adversaries.pdf:pdf},
pages = {1160--1168},
title = {{Online Learning with Switching Costs and Other Adaptive Adversaries}},
url = {http://papers.nips.cc/paper/5151-online-learning-with-switching-costs-and-other-adaptive-adversaries},
year = {2013}
}
@article{rusmevichientong2010linearly,
address = {Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA},
author = {Rusmevichientong, Paat and Tsitsiklis, John N},
journal = {Math. Oper. Res.},
keywords = {bandits},
mendeley-tags = {bandits},
month = {may},
number = {2},
pages = {395--411},
publisher = {Informs},
title = {{Linearly Parameterized Bandits}},
volume = {35},
year = {2010}
}
@inproceedings{smola2003kernels,
author = {Smola, A J and Kondor, R},
booktitle = {Proceedings of the Annual Conference on Computational Learning Theory and Kernel Workshop},
editor = {Sch{\"{o}}lkopf, B and Warmuth, M},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
title = {{Kernels and Regularization on Graphs}},
year = {2003}
}
@article{drineas2011fast,
abstract = {The statistical leverage scores of a matrix {\$}A{\$} are the squared row-norms of the matrix containing its (top) left singular vectors and the coherence is the largest leverage score. These quantities are of interest in recently-popular problems such as matrix completion and Nystr$\backslash$"{\{}o{\}}m-based low-rank matrix approximation as well as in large-scale statistical data analysis applications more generally; moreover, they are of interest since they define the key structural nonuniformity that must be dealt with in developing fast randomized matrix algorithms. Our main result is a randomized algorithm that takes as input an arbitrary {\$}n \backslashtimes d{\$} matrix {\$}A{\$}, with {\$}n \backslashgg d{\$}, and that returns as output relative-error approximations to all {\$}n{\$} of the statistical leverage scores. The proposed algorithm runs (under assumptions on the precise values of {\$}n{\$} and {\$}d{\$}) in {\$}O(n d \backslashlog n){\$} time, as opposed to the {\$}O(nd{\^{}}2){\$} time required by the na$\backslash$"{\{}i{\}}ve algorithm that involves computing an orthogonal basis for the range of {\$}A{\$}. Our analysis may be viewed in terms of computing a relative-error approximation to an underconstrained least-squares approximation problem, or, relatedly, it may be viewed as an application of Johnson-Lindenstrauss type ideas. Several practically-important extensions of our basic result are also described, including the approximation of so-called cross-leverage scores, the extension of these ideas to matrices with {\$}n \backslashapprox d{\$}, and the extension to streaming environments.},
author = {Drineas, Petros and Magdon-Ismail, Malik and Mahoney, Michael W and Woodruff, David P.},
journal = {International Conference on Machine Learning},
keywords = {matrix coherence,randomized algorithm,statistical leverage},
title = {{Fast approximation of matrix coherence and statistical leverage}},
year = {2012}
}
@inproceedings{even-dar02pacbounds,
author = {Even-dar, Eyal and Mannor, Shie and Mansour, Yishay},
booktitle = {In Fifteenth Annual Conference on Computational Learning Theory (COLT)},
pages = {255--270},
title = {{{\{}PAC{\}} bounds for multi-armed bandit and {\{}M{\}}arkov decision processes}},
year = {2002}
}
@inproceedings{yang2007bayesian,
annote = {comps{\_}distancX},
author = {Yang, Liu and Jin, Rong and Sukthankar, Rahul},
booktitle = {Proceedings of Uncertainty in AI},
title = {{Bayesian Active Distance Metric Learning.}},
url = {http://www.cs.cmu.edu/{~}rahuls/pub/uai2007-rahuls.pdf},
year = {2007}
}
@inproceedings{aggarwal2003framework,
author = {Aggarwal, Charu C and Han, Jiawei and Wang, Jianyong and Yu, Philip S},
booktitle = {Proceedings of the 29th international conference on Very large data bases - Volume 29},
isbn = {0-12-722442-4},
pages = {81--92},
publisher = {VLDB Endowment},
series = {VLDB '2003},
title = {{A framework for clustering evolving data streams}},
url = {http://portal.acm.org/citation.cfm?id=1315451.1315460},
year = {2003}
}
@inproceedings{Jenatton2011,
author = {Jenatton, R and Gramfort, A and Michel, V and Obozinski, G and Bach, F and Thirion, B},
booktitle = {International Workshop on Pattern Recognition in Neuroimaging (PRNI)},
title = {{Multi-scale Mining of f{\{}MRI{\}} Data with Hierarchical Structured Sparsity}},
year = {2011}
}
@article{dekel13switching,
author = {Dekel, Ofer and Ding, Jian and Koren, Tomer and Peres, Yuval},
journal = {CoRR},
title = {{Bandits with Switching Costs: {\{}T{\^{}}{\{}2/3{\}}{\}} Regret}},
volume = {abs/1310.2},
year = {2013}
}
@article{abernethy2008efficient,
abstract = {We introduce an efficient algorithm for the problem of online linear optimization in the bandit setting which achieves the optimal O (T regret. The setting is a natural generalization of the non-stochastic multi-armed bandit problem, and the existence of an efficient optimal algorithm has been posed as an open problem in a number of recent papers. We show how the difficulties encountered by previous approaches are overcome by the use of a self-concordant potential function. Our approach presents a novel connection between online learning and interior point methods.},
author = {Abernethy, Jacob Duncan and Hazan, Elad and Rakhlin, Alexander},
doi = {10.1080/09544820500115717},
institution = {EECS Department, University of California, Berkeley},
issn = {09544828},
journal = {Online},
number = {3},
pages = {540--543},
publisher = {Citeseer},
title = {{An Efficient Algorithm for Bandit Linear Optimization}},
url = {http://www.informaworld.com/openurl?genre=article{\&}doi=10.1080/09544820500115717{\&}magic=crossref},
volume = {1},
year = {2008}
}
@article{audibert2014regret,
author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien and Lugosi, G{\'{a}}bor},
journal = {Mathematics of Operations Research},
pages = {31--45},
title = {{Regret in online combinatorial optimization}},
volume = {39},
year = {2014}
}
@article{JNTV05,
author = {Juditsky, A and Nazin, A and Tsybakov, A and Vayatis, N},
journal = {Problems of Information Transmission},
number = {4},
pages = {368--384},
title = {{Recursive Aggregation of Estimators by the Mirror Descent Algorithm with Averaging}},
volume = {41},
year = {2005}
}
@inproceedings{bezdek2002some,
address = {London, UK},
author = {Bezdek, James C and Hathaway, Richard J},
booktitle = {Proceedings of the 2002 AFSS International Conference on Fuzzy Systems. Calcutta: Advances in Soft Computing},
isbn = {3-540-43150-0},
pages = {288--300},
publisher = {Springer-Verlag},
series = {AFSS '02},
title = {{Some Notes on Alternating Optimization}},
url = {http://portal.acm.org/citation.cfm?id=647300.721144},
year = {2002}
}
@article{tropp2015an-introduction,
author = {Tropp, Joel Aaron},
journal = {Foundations and Trends in Machine Learning},
number = {1-2},
pages = {1--230},
title = {{An introduction to matrix concentration inequalities}},
volume = {8},
year = {2015}
}
@inproceedings{erraqabi2016pliable,
abstract = {Rejection sampling is a known technique for sampling from difficult distributions. However, its use is limited due to a high rejection rate. Common adaptive rejection sampling methods either work for very specific distributions or without performance guarantees. In this paper, we present pliable rejection sampling (PRS), a new approach to rejection sampling, where we adapt the sampling envelope using a kernel estimator. Since our method builds on rejection sampling, the samples obtained are i.i.d. and exactly distributed according to f. Another benefit of PRS is that it comes with a guarantee on the number of accepted samples.},
author = {Erraqabi, Akram and Valko, Michal and Carpentier, Alexandra and Maillard, Odalric-Ambrym},
booktitle = {International Conference on Machine Learning},
file = {:Users/miki/Dropbox/research/bandits/adaptive rejection sampling/ICML16/erraqabi2016pliable.pdf:pdf},
title = {{Pliable rejection sampling}},
year = {2016}
}
@article{williams1992simple,
author = {Williams, Ronald},
journal = {Machine Learning},
number = {3-4},
pages = {229--256},
title = {{Simple Statistical Gradient-Following Algorithms for Connectionist Reinforcement Learning}},
volume = {8},
year = {1992}
}
@inproceedings{Audibert2007,
address = {Berlin, Heidelberg},
author = {Audibert, Jean-Yves and Munos, R{\'{e}}mi and Szepesv{\'{a}}ri, Csaba},
doi = {10.1007/978-3-540-75225-7},
editor = {Hutter, Marcus and Servedio, Rocco A. and Takimoto, Eiji},
isbn = {978-3-540-75224-0},
issn = {0302-9743},
month = {oct},
pages = {150--165},
publisher = {Springer Berlin Heidelberg},
series = {Lecture Notes in Computer Science},
title = {{Algorithmic Learning Theory}},
url = {http://dl.acm.org/citation.cfm?id=1422422.1422442},
volume = {4754},
year = {2007}
}
@article{tesauro1994td-gammon,
author = {Tesauro, Gerald},
journal = {Neural Computation},
number = {2},
pages = {215--219},
title = {{{\{}TD-Gammon{\}}, a Self-Teaching Backgammon Program, Achieves Master-Level Play}},
volume = {6},
year = {1994}
}
@article{engel2004kernel,
abstract = {We present a nonlinear version of the recursive least squares (RLS) algorithm. Our algorithm performs linear regression in a high-dimensional feature space induced by a Mercer kernel and can therefore be used to recursively construct minimum mean-squared-error solutions to nonlinear least-squares problems that are frequently encountered in signal processing applications. In order to regularize solutions and keep the complexity of the algorithm bounded, we use a sequential sparsification process that admits into the kernel representation a new input sample only if its feature space image cannot be sufficiently well approximated by combining the images of previously admitted samples. This sparsification procedure allows the algorithm to operate online, often in real time. We analyze the behavior of the algorithm, compare its scaling properties to those of support vector machines, and demonstrate its utility in solving two signal processing problems-time-series prediction and channel equalization.},
author = {Engel, Yaakov and Mannor, Shie and Meir, Ron},
journal = {IEEE Transactions on Signal Processing},
number = {8},
pages = {2275--2285},
title = {{The kernel recursive least-squares algorithm}},
volume = {52},
year = {2004}
}
@inproceedings{demiris96imitative,
author = {Demiris, John and Hayes, Gillian},
booktitle = {Proceedings of the 5th European Workshop on Learning Robots},
editor = {Klingspor, Volker},
pages = {9--16},
title = {{Imitative learning mechanisms in robots and humans}},
url = {citeseer.ist.psu.edu/demiris96imitative.html},
year = {1996}
}
@inproceedings{Verbaeten-2003-MisLabeled,
author = {Verbaeten, S and Assche., A V},
booktitle = {Proceeding of 4th International Workshop on Multiple Classifier Systems},
title = {{Ensemble Methods for Noise Elimination in Classification Problems.}},
year = {2003}
}
@book{jordan1999learning,
address = {Cambridge, MA, USA},
editor = {Jordan, Michael I},
isbn = {0-262-60032-3},
publisher = {MIT Press},
title = {{Learning in graphical models}},
year = {1999}
}
@article{markou2003novelty,
address = {Amsterdam, The Netherlands, The Netherlands},
annote = {comps{\_}ano},
author = {Markou, Markos and Singh, Sameer},
doi = {http://dx.doi.org/10.1016/j.sigpro.2003.07.019},
issn = {0165-1684},
journal = {Signal Process.},
number = {12},
pages = {2481--2497},
publisher = {Elsevier North-Holland, Inc.},
title = {{Novelty detection: a review, part 1: statistical approaches}},
volume = {83},
year = {2003}
}
@inproceedings{eskin2000detecting,
address = {Seattle},
annote = {comps{\_}ano},
author = {Eskin, Eleazar},
booktitle = {Proc. 17th International Conf. on Machine Learning},
month = {apr},
pages = {255--262},
publisher = {Morgan Kaufmann, San Francisco, {\{}CA{\}}},
title = {{Anomaly Detection over Noisy Data using Learned Probability Distributions}},
url = {http://citeseer.ist.psu.edu/eskin00anomaly.html http://citeseer.comp.nus.edu.sg/542907.html},
year = {2000}
}
@article{lazaric11stoch_adv,
author = {Lazaric, A and Munos, R},
journal = {Journal of Computer and System Sciences (Special issue: Cloud Computing 2011)},
pages = {1516--1537},
title = {{Learning with Stochastic Inputs and Adversarial Outputs}},
volume = {78(5)},
year = {2012}
}
@article{orlin2009faster,
author = {Orlin, James B},
doi = {10.1007/s10107-007-0189-2},
isbn = {1010700701892},
issn = {00255610},
journal = {Mathematical Programming},
number = {2},
pages = {237--251},
publisher = {Springer},
title = {{A faster strongly polynomial time algorithm for submodular function minimization}},
url = {http://www.springerlink.com/index/10.1007/s10107-007-0189-2},
volume = {118},
year = {2009}
}
@article{lovasz1982submodular,
author = {Lov{\'{a}}sz, L},
journal = {Mathematical programming: The state of the art, Bonn},
pages = {235--257},
title = {{Submodular functions and convexity}},
year = {1982}
}
@inproceedings{hazan2009online,
abstract = {We consider an online decision problem over a discrete space in which the loss function is submodular. We give algorithms which are computationally efficient and are Hannan-consistent in both the full information and bandit settings.},
author = {Hazan, Elad and Kale, Satyen},
booktitle = {Advances in Neural Information Processing Systems 22},
editor = {Bengio, Y and Schuurmans, D and Lafferty, J and Williams, C K I and Culotta, A},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Hazan, Kale - 2009 - Beyond Convexity Online Submodular Minimization.pdf:pdf},
pages = {700--708},
publisher = {Citeseer},
title = {{Beyond Convexity: Online Submodular Minimization}},
url = {http://www.satyenkale.com/papers/submodular.pdf http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.157.4365{\&}rep=rep1{\&}type=pdf},
year = {2009}
}
@article{Agr95,
author = {Agrawal, R},
journal = {Advances in Applied Mathematics},
pages = {1054--1078},
title = {{Sample mean based index policies with O(log n) regret for the multi-armed bandit problem}},
volume = {27},
year = {1995}
}
@inproceedings{Pol05,
author = {Poland, Jan},
booktitle = {In 3rd Symposium on Stochastic Algorithms, Foundations and Applications (SAGA'05)},
pages = {58--69},
title = {{{\{}FPL{\}} analysis for adaptive bandits}},
year = {2005}
}
@inproceedings{gittens2013revisiting,
abstract = {We reconsider randomized algorithms for the low-rank approximation of symmetric positive semi-definite (SPSD) matrices such as Laplacian and kernel matrices that arise in data analysis and machine learning applications. Our main results consist of an empirical evaluation of the performance quality and running time of sampling and projection methods on a diverse suite of SPSD matrices. Our results highlight complementary aspects of sampling versus projection methods based on leverage scores. We complement our empirical results with a suite of worst-case theoretical bounds for both random sampling and random projections methods. These bounds are qualitatively superior to existing bounds---e.g., improved additive-error bounds for the spectral and Frobenius norm errors and relative-error bounds for the trace norm error.},
archivePrefix = {arXiv},
arxivId = {1303.1849},
author = {Gittens, Alex and Mahoney, Michael W},
booktitle = {International Conference on Machine Learning},
eprint = {1303.1849},
keywords = {ized algorithms,kernel methods,low-rank approximation,numerical linear algebra,nystr,om approximation,random-},
title = {{Revisiting the Nystr{\"{o}}m method for improved large-scale machine learning}},
year = {2013}
}
@inproceedings{ma2003online,
address = {New York, NY, USA},
author = {Ma, Junshui and Perkins, Simon},
booktitle = {KDD '03: Proceedings of the ninth ACM SIGKDD international conference on Knowledge discovery and data mining},
doi = {http://doi.acm.org/10.1145/956750.956828},
isbn = {1-58113-737-0},
pages = {613--618},
publisher = {ACM},
title = {{Online novelty detection on temporal sequences}},
year = {2003}
}
@phdthesis{valko2011adaptive,
abstract = {We develop graph-based methods for semi-supervised learning based on label propagation on a data similarity graph. When data is abundant or arrive in a stream, the problems of computation and data storage arise for any graph-based method. We propose a fast approximate online algorithm that solves for the harmonic solution on an approximate graph. We show, both empirically and theoretically, that good behavior can be achieved by collapsing nearby points into a set of local representative points that minimize distortion. Moreover, we regularize the harmonic solution to achieve better stability properties. We also present graph-based methods for detecting conditional anomalies and apply them to the identification of unusual clinical actions in hospitals. Our hypothesis is that patient-management actions that are unusual with respect to the past patients may be due to errors and that it is worthwhile to raise an alert if such a condition is encountered. Conditional anomaly detection extends standard unconditional anomaly framework but also faces new problems known as fringe and isolated points. We devise novel nonparametric graph-based methods to tackle these problems. Our methods rely on graph connectivity analysis and soft harmonic solution. Finally, we conduct an extensive human evaluation study of our conditional anomaly methods by 15 experts in critical care.},
author = {Valko, Michal},
keywords = {misovalko},
mendeley-tags = {misovalko},
month = {aug},
school = {University of Pittsburgh},
title = {{Adaptive Graph-Based Algorithms for Conditional Anomaly Detection and Semi-Supervised Learning}},
url = {http://researchers.lille.inria.fr/{~}valko/hp/serve.php?what=publications/valko2011adaptive.pdf},
year = {2011}
}
@inproceedings{szorenyi2014optimistic,
abstract = {We consider the problem of online planning in a Markov decision process with discounted rewards for any given initial state. We consider the PAC sample com-plexity problem of computing, with probability 1−$\delta$, an �-optimal action using the smallest possible number of calls to the generative model (which provides reward and next-state samples). We design an algorithm, called StOP (for Stochastic-Optimistic Planning), based on the " optimism in the face of uncertainty " princi-ple. StOP can be used in the general setting, requires only a generative model, and enjoys a complexity bound that only depends on the local structure of the MDP.},
author = {Sz{\"{o}}r{\'{e}}nyi, Bal{\'{a}}zs and Kedenburg, Gunnar and Munos, R{\'{e}}mi},
booktitle = {Neural Information Processing Systems},
title = {{Optimistic planning in Markov decision processes using a generative model}},
year = {2014}
}
@book{schrijver2004combinatorial,
author = {Schrijver, A},
publisher = {Springer},
title = {{Combinatorial optimization: Polyhedra and efficiency}},
year = {2004}
}
@inproceedings{das2008algorithms,
author = {Das, A and Kempe, D},
booktitle = {Proceedings of the 40th annual ACM symposium on Theory of computing},
organization = {ACM},
title = {{Algorithms for subset selection in linear regression}},
year = {2008}
}
@inproceedings{Taskar+al:EMNLP04,
author = {Taskar, B and Klein, D and Collins, M and Koller, D and Manning, C},
booktitle = {Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP)},
pages = {1--8},
title = {{Max-Margin Parsing}},
year = {2004}
}
@inproceedings{abe2006outlier,
author = {Abe, Naoki and Zadrozny, Bianca and Langford, John},
booktitle = {Proceedings of the 12th ACM SIGKDD International Conference on Knowledge Discovery and Data Mining},
doi = {10.1145/1150402.1150459},
isbn = {1-59593-339-5},
keywords = {active learning,ensemble method,outlier detection},
pages = {504--509},
title = {{Outlier Detection by Active Learning}},
year = {2006}
}
@article{horst1999dc,
author = {Horst, R and Thoai, N V},
journal = {Journal of Optimization Theory and Applications},
number = {1},
pages = {1--43},
publisher = {Springer},
title = {{DC programming: overview}},
volume = {103},
year = {1999}
}
@article{chen2009similarity,
abstract = {This paper reviews and extends the field of similarity-based classification, presenting new analyses, algorithms, data sets, and a comprehensive set of experimental results for a rich collection of classification problems. Specifically, the generalizability of using similarities as features is analyzed, design goals and methods for weighting nearest-neighbors for similarity-based learning are proposed, and different methods for consistently converting similarities into kernels are compared. Experiments on eight real data sets compare eight approaches and their variants to similarity-based learning.},
author = {Chen, Yihua and Garcia, Eric K and Gupta, Maya R and Rahimi, Ali and Cazzanti, Luca},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Chen et al. - 2009 - Similarity-based Classification Concepts and Algorithms.pdf:pdf},
issn = {15324435},
journal = {Journal of Machine Learning Research},
number = {206},
pages = {747--776},
publisher = {JMLR. org},
title = {{Similarity-based Classification: Concepts and Algorithms}},
url = {http://jmlr.csail.mit.edu/papers/volume10/chen09a/chen09a.pdf},
volume = {10},
year = {2009}
}
@article{Sho67,
annote = {(In Russian)},
author = {Shor, N},
journal = {Kibernetika},
pages = {53--55},
title = {{Generalized gradient descent with application to block programming}},
volume = {3},
year = {1967}
}
@unpublished{Rak09,
author = {Rakhlin, A},
title = {{Lecture Notes on Online Learning}},
year = {2009}
}
@inproceedings{williams2001using,
abstract = {A major problem for kernel-based predictors (such as Support Vector Machines and Gaussian processes) is that the amount of computation required to find the solution scales as O(n ), where n is the number of training examples. We show that an approximation to the eigendecomposition of the Gram matrix can be computed by the Nystr{\"{o}}m method (which is used for the numerical solution of eigenproblems). This is achieved by carrying out an eigendecomposition on a smaller system of size m {\textless} n, and then expanding the results back up to n dimensions. The computational complexity of a predictor using this approximation is O(m n). We report experiments on the USPS and abalone data sets and show that we can set m n without any significant decrease in the accuracy of the solution.},
author = {Williams, Christopher and Seeger, Matthias},
booktitle = {Neural Information Processing Systems},
title = {{Using the Nystrom method to speed up kernel machines}},
year = {2001}
}
@inproceedings{elliott84gibbs,
author = {Elliott, H and Derin, H and Cristi, R and Geman, D},
booktitle = {Proceeding of the 1984 Int. Conf. Acoust., Speech, Signal Processing, ICASSP'84},
pages = {32.5.1----32.5.4},
title = {{Application of the {\{}G{\}}ibbs distribution to image segmentation}},
year = {1984}
}
@inproceedings{zhu2003semi-supervised,
author = {Zhu, Xiaojin and Ghahramani, Zoubin and Lafferty, John},
booktitle = {Proceedings of the 20th International Conference on Machine Learning},
pages = {912--919},
title = {{Semi-Supervised Learning Using Gaussian Fields and Harmonic Functions}},
year = {2003}
}
@inproceedings{ng2000pegasus:,
author = {Ng, Andrew and Jordan, Michael},
booktitle = {Proceedings of the 16th Conference on Uncertainty in Artificial Intelligence},
pages = {406--415},
title = {{{\{}PEGASUS{\}}: A Policy Search Method for Large {\{}MDPs{\}} and {\{}POMDPs{\}}}},
year = {2000}
}
@article{hager1989updating,
author = {Hager, W W},
journal = {SIAM review},
pages = {221--239},
publisher = {JSTOR},
title = {{Updating the inverse of a matrix}},
year = {1989}
}
@inproceedings{McMaStre09,
author = {McMahan, H Brendan and Streeter, Matthew},
title = {{Tighter Bounds for Multi-Armed Bandits with Expert Advice}}
}
@article{Jenatton2010b,
author = {Jenatton, R and Mairal, J and Obozinski, G and Bach, F},
journal = {Journal Machine Learning Research},
pages = {2297--2334},
title = {{Proximal Methods for Hierarchical Sparse Coding}},
volume = {12},
year = {2011}
}
@inproceedings{szepes06learning,
author = {Antos, A and Szepesv{\'{a}}ri, $\backslash$textCs and Munos, R},
doi = {10.1007/11776420_42},
pages = {574--588},
title = {{Learning near-optimal policies with {\{}B{\}}ellman-residual minimization based fitted policy iteration and a single sample path}},
url = {http://www.springerlink.com/content/x85884360133802l/?p=68d26700c320427caf7c027981b37b8f{\&}pi=41}
}
@inproceedings{bagnell2010efficient,
abstract = {Imitation Learning, while applied successfully on many large real-world$\backslash$nproblems, is typically addressed as a standard supervised learning$\backslash$nproblem, where it is assumed the training and testing data are i.i.d..$\backslash$nThis is not true in imitation learning as the learned policy influences$\backslash$nthe future test inputs (states) upon which it will be tested. We$\backslash$nshow that this leads to compounding errors and a regret bound that$\backslash$ngrows quadratically in the time horizon of the task. We propose two$\backslash$nalternative algorithms for imitation learning where training occurs$\backslash$nover several episodes of interaction. These two approaches share$\backslash$nin common that the learner's policy is slowly modified from executing$\backslash$nthe expert's policy to the learned policy. We show that this leads$\backslash$nto stronger performance guarantees and demonstrate the improved performance$\backslash$non two challenging problems: training a learner to play 1) a 3D racing$\backslash$ngame (Super Tux Kart) and 2) Mario Bros.; given input images from$\backslash$nthe games and corresponding actions taken by a human expert and near-optimal$\backslash$nplanner respectively.},
author = {Bagnell, J Andrew and Ross, St{\'{e}}phane},
booktitle = {Proceedings of the 13th International Conference on Artificial Intelligence and Statistics (AISTATS) 2010},
pages = {661--668},
title = {{Efficient Reductions for Imitation Learning}},
volume = {9},
year = {2010}
}
@book{bertsekas1996neuro-dynamic,
address = {Belmont, MA},
author = {Bertsekas, Dimitri and Tsitsiklis, John},
publisher = {Athena Scientific},
title = {{Neuro-Dynamic Programming}},
year = {1996}
}
@inproceedings{erraqabi2016rewards,
abstract = {In multi-armed bandits, the most common objective is the maximization of the cumulative reward. Alternative settings include active exploration, where a learner tries to gain accurate estimates of the rewards of all arms. While these objectives are contrasting, in many scenarios it is desirable to trade off rewards and errors. For instance, in educational games the designer wants to gather generalizable knowledge about the behavior of the students and teaching strategies (small estimation errors) but, at the same time, the system needs to avoid giving a bad experience to the players, who may leave the system permanently (large reward). In this paper, we formalize this tradeoff and introduce the ForcingBalance algorithm whose performance is provably close to the best possible tradeoff strategy. Finally, we demonstrate on real-world educational data that ForcingBalance returns useful information about the arms without compromising the overall reward.},
author = {Erraqabi, Akram and Lazaric, Alessandro and Valko, Michal and Brunskill, Emma and Liu, Yun-en},
booktitle = {Challenges in Machine Learning: Gaming and Education workshop at Neural Information Processing Systems},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Erraqabi et al. - 2016 - Rewards and errors in multi-arm bandits for interactive education.pdf:pdf},
title = {{Rewards and errors in multi-arm bandits for interactive education}},
year = {2016}
}
@inproceedings{dechter1997mini-buckets:,
author = {Dechter, Rina},
booktitle = {Proceedings of the 15th International Joint Conference on Artificial Intelligence},
pages = {1297--1303},
title = {{Mini-Buckets: A General Scheme for Generating Approximations in Automated Reasoning}},
year = {1997}
}
@article{GyLiLu11Corr,
annote = {From Duplicate 3 (Efficient Tracking of Large Classes of Experts - Gy{\"{o}}rgy, A; Linder, T; Lugosi, G)
Accepted with minor revisions},
author = {Gy{\"{o}}rgy, Andr{\'{a}}s and Linder, Tam{\'{a}}s and Lugosi, G{\'{a}}bor},
journal = {IEEE Transactions on Information Theory},
number = {11},
pages = {6709--6725},
title = {{Efficient Tracking of Large Classes of Experts}},
volume = {abs/1110.2},
year = {2012}
}
@book{dubitzky2007fundamentals,
abstract = {Finding reliable, meaningful patterns in data with high numbers of attributes can be extremely difficult. Feature selection helps us to decide what attributes or combination of attributes are most important for finding these patterns. In this chapter, we study feature selection methods for building classification models from high-throughput genomic (microarray) and proteomic (mass spectrometry) data sets. Thousands of feature candidates must be analyzed, compared and combined in such data sets. We describe the basics of four different approaches used for feature selection and illustrate their effects on an MS cancer proteomic data set. The closing discussion provides assistance in performing an analysis in high-dimensional genomic and proteomic data.},
author = {Dubitzky, W and Granzow, M and Berrar, Dp},
booktitle = {Vasa},
pages = {149--172},
title = {{Fundamentals of data mining in genomics and proteomics}},
year = {2007}
}
@inproceedings{guestrin2003generalizing,
author = {Guestrin, Carlos and Koller, Daphne and Gearhart, Chris and Kanodia, Neal},
booktitle = {Proceedings of the 18th International Joint Conference on Artificial Intelligence},
pages = {1003--1010},
title = {{Generalizing Plans to New Environments in Relational {\{}MDPs{\}}}},
year = {2003}
}
@article{kirkpatrick1983optimization,
author = {Kirkpatrick, S and Gelatt, C D and Vecchi, M P},
journal = {Science},
number = {4598},
pages = {671--680},
title = {{Optimization by Simulated Annealing}},
volume = {220},
year = {1983}
}
@inproceedings{catoni2012challenging,
author = {Catoni, Olivier},
booktitle = {Annales de l'Institut Henri Poincar{\'{e}}, Probabilit{\'{e}}s et Statistiques},
number = {4},
pages = {1148--1185},
title = {{Challenging the empirical mean and empirical variance: A deviation study}},
volume = {48},
year = {2012}
}
@article{warkentin2004heparin-induced,
abstract = {This chapter about the recognition, treatment, and prevention of heparin-induced
thrombocytopenia (HIT) is part of the Seventh ACCP Conference on
Antithrombotic and Thrombolytic Therapy: Evidence Based Guidelines.
Grade 1 recommendations are strong and indicate that the benefits
do, or do not, outweigh risks, burden, and costs. Grade 2 suggests
that individual patients' values may lead to different choices (for
a full understanding of the grading, see Guyatt et al, CHEST 2004;
126:179S-187S). Among the key recommendations in this chapter are
the following: For patients in whom the risk of HIT is considered
to be {\textgreater} 0.1{\%}, we recommend platelet count monitoring (Grade 1C).
For patients who are receiving therapeutic-dose unfractionated heparin
(UFH), we suggest at least every-other-day platelet count monitoring
until day 14, or until UFH is stopped, whichever occurs first (Grade
2C). For patients who are receiving postoperative antithrombotic
prophylaxis with UFH (HIT risk {\textgreater} 1{\%}), we suggest at least every-other-day
platelet count monitoring between postoperative days 4 to 14 (or
until UFH is stopped, whichever occurs first) [Grade 2C]. For medical/obstetric
patients who are receiving prophylactic-dose UFH, postoperative patients
receiving prophylactic-dose low molecular weight heparin (LMWH),
postoperative patients receiving intravascular catheter UFH "flushes,"
or medical/obstetrical patients receiving LMWH after first receiving
UFH (risk, 0.1 to 1{\%}), we suggest platelet count monitoring every
2 days or 3 days from day 4 to day 14, or until heparin is stopped,
whichever occurs first (Grade 2C). For medical/obstetrical patients
who are only receiving LMWH, or medical patients who are receiving
only intravascular catheter UFH flushes (risk {\textless} 0.1{\%}), we suggest
clinicians do not use routine platelet count monitoring (Grade 2C).
For patients with strongly suspected (or confirmed) HIT, whether
or not complicated by thrombosis, we recommend use of an alternative
anticoagulant, such as lepirudin (Grade 1C+), argatroban (Grade 1C),
bivalirudin (Grade 2C), or danaparoid (Grade 1B). For patients with
strongly suspected (or confirmed) HIT, we recommend routine ultrasonography
of the lower-limb veins for investigation of deep venous thrombosis
(Grade 1C); against the use of vitamin K antagonist (VKA) [coumarin]
therapy until after the platelet count has substantially recovered;
that the VKA antagonist be administered only during overlapping alternative
anticoagulation (minimum 5-day overlap); and begun with low, maintenance
doses (all Grade 2C). For patients receiving VKAs at the time of
diagnosis of HIT, we recommend use of vitamin K (Grade 2C) [corrected]
For patients with a history of HIT who are HIT antibody negative
and require cardiac surgery, we recommend use of UFH (Grade 1C).},
author = {Warkentin, Theodore E and Greinacher, Andreas},
doi = {10.1378/chest.126.3_suppl.311S},
institution = {Hamilton Regional Laboratory Medicine Program, Hamilton Health Sciences, General Site, 237 Barton St E, Hamilton, Ontario L8L 2X2, Canada. twarken@mcmaster.ca},
journal = {Chest},
keywords = {Dose-Response Relationship,Drug; Drug Monitoring; Evidence-Based Medicine; F,Low-Molecular-Weight; Humans; Platelet Count; Pre},
month = {sep},
number = {3 Suppl},
pages = {311S--337S},
pmid = {15383477},
title = {{Heparin-induced thrombocytopenia: recognition, treatment, and prevention: the Seventh ACCP Conference on Antithrombotic and Thrombolytic Therapy.}},
url = {http://dx.doi.org/10.1378/chest.126.3{\_}suppl.311S},
volume = {126},
year = {2004}
}
@inproceedings{seldin2014prediction,
author = {Seldin, Yevgeny and Bartlett, Peter and Crammer, Koby and Abbasi-Yadkori, Yasin},
booktitle = {International Conference on Machine Learning},
title = {{Prediction with limited advice and multiarmed bandits with paid observations}},
year = {2014}
}
@article{streeter2006simple,
author = {Streeter, Matthew J. and Smith, Stephen F.},
isbn = {3-540-46267-8},
journal = {Principles and Practice of Constraint Programming},
keywords = {dblp},
pages = {560--574},
title = {{A Simple Distribution-Free Approach to the Max k-Armed Bandit Problem.}},
url = {http://dblp.uni-trier.de/db/conf/cp/cp2006.html{\#}StreeterS06},
volume = {4204},
year = {2006}
}
@article{ross2010reduction,
abstract = {Sequential prediction problems such as imitation learning, where future observations depend on previous predictions (actions), violate the common i.i.d. assumptions made in statistical learning. This leads to poor performance in theory and often in practice. Some recent approaches provide stronger guarantees in this setting, but remain somewhat unsatisfactory as they train either non-stationary or stochastic policies and require a large number of iterations. In this paper, we propose a new iterative algorithm, which trains a stationary deterministic policy, that can be seen as a no regret algorithm in an online learning setting. We show that any such no regret algorithm, combined with additional reduction assumptions, must find a policy with good performance under the distribution of observations it induces in such sequential settings. We demonstrate that this new approach outperforms previous approaches on two challenging imitation learning problems and a benchmark sequence labeling problem.},
author = {Ross, Stephane and Gordon, Geoffrey J and Bagnell, J Andrew},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Ross, Gordon, Bagnell - 2010 - A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning.pdf:pdf},
journal = {AISTATS},
pages = {627--635},
title = {{A Reduction of Imitation Learning and Structured Prediction to No-Regret Online Learning}},
url = {http://arxiv.org/abs/1011.0686},
volume = {15},
year = {2010}
}
@inproceedings{Vem10,
author = {Vempala, S},
booktitle = {IARCS Annual Conference on Foundations of Software Technology and Theoretical Computer Science (FSTTCS 2010)},
editor = {Lodaya, K and Mahajan, M},
pages = {42--64},
publisher = {Schloss Dagstuhl--Leibniz-Zentrum fur Informatik},
series = {Leibniz International Proceedings in Informatics (LIPIcs)},
title = {{Recent Progress and Open Problems in Algorithmic Convex Geometry}},
volume = {8},
year = {2010}
}
@incollection{asadpour2008stochastic,
author = {Asadpour, Arash and Nazerzadeh, Hamid and Saberi, Amin},
booktitle = {Internet and Network Economics},
pages = {477--489},
publisher = {Springer},
title = {{Stochastic submodular maximization}},
year = {2008}
}
@article{haupt2006signal,
author = {Haupt, J and Nowak, R},
journal = {IEEE Transactions on Information Theory},
number = {9},
pages = {4036--4048},
publisher = {IEEE},
title = {{Signal reconstruction from noisy random projections}},
volume = {52},
year = {2006}
}
@inproceedings{silva:long,
author = {da Silva, Valdinei Freire and Costa, Anna Helena Reali and Lima, Pedro},
pages = {4246--4251},
title = {{Inverse Reinforcement Learning with Evaluation}}
}
@inproceedings{szummer2001partially,
abstract = {To classify a large number of unlabeled examples we combine a limited
number of labeled examples with a Markov random walk representation
over the unlabeled examples. The random walk representation exploits
any low dimensional structure in the data in a robust, probabilistic
manner. We develop and compare several estimation criteria/algorithms
suited to this representation. This includes in particular multi-way
classification with an average margin criterion which permits a closed
form...},
author = {Szummer, Martin and Jaakkola, Tommi},
booktitle = {Advances in Neural Information Processing Systems},
keywords = {classification,graph,machine-learning},
title = {{Partially labeled classification with Markov random walks}},
volume = {14},
year = {2001}
}
@article{gorur2011concave,
author = {G{\"{o}}r{\"{u}}r, Dilan and Teh, Yee Whye},
journal = {Journal of Computational and Graphical Statistics},
title = {{Concave-Convex adaptive rejection sampling}},
year = {2011}
}
@article{geman1984stochastic,
author = {Geman, Stuart and Geman, Donald},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
number = {6},
pages = {721--741},
title = {{Stochastic Relaxation, {\{}Gibbs{\}} Distribution, and the {\{}Bayesian{\}} Restoration of Images}},
volume = {6},
year = {1984}
}
@article{tropp2011freedman,
author = {Tropp, Joel Aaron},
journal = {Electronic Communications in Probability},
pages = {262--270},
title = {{Freedman's inequality for matrix martingales}},
volume = {16},
year = {2011}
}
@article{zheng2000lazy,
address = {Hingham, MA, USA},
annote = {comps{\_}models},
author = {Zheng, Zijian and Webb, Geoffrey I},
doi = {http://dx.doi.org/10.1023/A:1007613203719},
issn = {0885-6125},
journal = {Mach. Learn.},
number = {1},
pages = {53--84},
publisher = {Kluwer Academic Publishers},
title = {{Lazy Learning of Bayesian Rules}},
volume = {41},
year = {2000}
}
@inproceedings{kocak2016onlinea,
abstract = {We consider adversarial multi-armed bandit problems where the learner is allowed to observe losses of a number of arms beside the arm that it actually chose. We study the case where all non-chosen arms reveal their loss with an unknown probability rt, independently of each other and the action of the learner. Moreover, we allow rt to change in every round t, which rules out the possibility of estimating rt by a well-concentrated sample average. We propose an algorithm which operates under the assumption that rt is large enough to warrant at least one side observation with high probability. We show that after T rounds in a bandit problem with N arms, the expected regret of our algorithm is of order O(sqrt(sum(t=1)T (1/rt) log N )), given that rt less than log T / (2N-2) for all t. All our bounds are within logarithmic factors of the best achievable performance of any algorithm that is even allowed to know exact values of rt.},
author = {Koc{\'{a}}k, Tom{\'{a}}{\v{s}} and Neu, Gergely and Valko, Michal},
booktitle = {Uncertainty in Artificial Intelligence},
title = {{Online learning with Erdős-R{\'{e}}nyi side-observation graphs}},
year = {2016}
}
@phdthesis{meila1999learning,
author = {Meila, Marina},
school = {Massachusetts Institute of Technology},
title = {{Learning with Mixtures of Trees}},
year = {1999}
}
@article{audibert2010regret,
author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien},
journal = {Journal of Machine Learning Research},
keywords = {bandits},
mendeley-tags = {bandits},
month = {dec},
pages = {2785--2836},
publisher = {JMLR.org},
title = {{Regret bounds and minimax policies under partial monitoring}},
volume = {11},
year = {2010}
}
@inproceedings{shental2003learning,
annote = {comps{\_}distance},
author = {Shental, Noam and Weinshall, Daphna},
booktitle = {In Proceedings of the Twentieth International Conference on Machine Learning},
pages = {11--18},
title = {{Learning Distance Functions using Equivalence Relations}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.7.8086},
year = {2003}
}
@article{johnson98pcfg,
author = {Johnson, Mark},
journal = {Computational Linguistics},
number = {4},
pages = {613--632},
title = {{{\{}PCFG{\}} Models of Linguistic Tree Representations}},
url = {citeseer.ist.psu.edu/johnson98pcfg.html},
volume = {24},
year = {1998}
}
@article{levine2010feature,
author = {Levine, S and Popovic, Z and Koltun, V},
journal = {Advances in Neural Information Processing Systems},
title = {{Feature construction for inverse reinforcement learning}},
url = {http://www.stanford.edu/{~}svlevine/papers/firl.pdf},
volume = {23},
year = {2010}
}
@article{mairal2011b,
author = {Mairal, J and Jenatton, R and Obozinski, G and Bach, F},
journal = {Journal of Machine Learning Research},
pages = {2681--2720},
title = {{Convex and Network Flow Optimization for Structured Sparsity}},
volume = {12},
year = {2011}
}
@techreport{Warmuth97continuousand,
author = {Warmuth, M K and Jagota, A K},
institution = {In Fifth International Symposium on Artificial Intelligence and Mathematics},
title = {{Continuous and discrete-time nonlinear gradient descent: Relative loss bounds and convergence}},
year = {1997}
}
@inproceedings{Kavukcuoglu2009,
author = {Kavukcuoglu, K and Ranzato, M A and Fergus, R and Le-Cun, Y},
booktitle = {Proc. CVPR},
title = {{Learning invariant features through topographic filter maps}},
year = {2009}
}
@inproceedings{mannor2006online,
author = {Mannor, Shie and Tsitsiklis, John},
booktitle = {Proceedings of 19th Annual Conference on Learning Theory},
pages = {529--543},
title = {{Online Learning with Constraints}},
year = {2006}
}
@article{zhang1996nonparametric,
abstract = {Abstract Importance sampling is a widely used variance reduction simulation technique for the evaluation of high-dimensional integrals. A key step in the implementation of importance sampling is to choose a proper distribution function from which pseudorandom numbers are generated. Parametric sampling distributions, if available at all, are often inadequate for high-dimensional integrals over irregular regions. One possible remedy is to use a nonparametric method to estimate the unknown optimal sampling function. We show that the nonparametric approach yields integral estimates that converge faster than estimates obtained from parametric approaches. We also demonstrate that an adaptive method, which has been used successfully in parametric settings, does not yield better results than simple one-step methods in the nonparametric setting.},
author = {Zhang, Ping},
journal = {Journal of the American Statistical Association},
keywords = {Adaptive importance sampling,Integral evaluation,Kernel density estimation,Monte Carlo simulation,Variance reduction},
number = {435},
pages = {1245--1253},
title = {{Nonparametric importance sampling}},
volume = {91},
year = {1996}
}
@inproceedings{bach2013sharp,
abstract = {We consider supervised learning problems within the positive-definite kernel framework, such as kernel ridge regression, kernel logistic regression or the support vector machine. With kernels leading to infinite-dimensional feature spaces, a common practical limiting difficulty is the necessity of computing the kernel matrix, which most frequently leads to algorithms with running time at least quadratic in the number of observations n, i.e., O(n{\^{}}2). Low-rank approximations of the kernel matrix are often considered as they allow the reduction of running time complexities to O(p{\^{}}2 n), where p is the rank of the approximation. The practicality of such methods thus depends on the required rank p. In this paper, we show that in the context of kernel ridge regression, for approximations based on a random subset of columns of the original kernel matrix, the rank p may be chosen to be linear in the degrees of freedom associated with the problem, a quantity which is classically used in the statistical analysis of such methods, and is often seen as the implicit number of parameters of non-parametric estimators. This result enables simple algorithms that have sub-quadratic running time complexity, but provably exhibit the same predictive performance than existing algorithms, for any given problem instance, and not only for worst-case situations.},
author = {Bach, Francis},
booktitle = {Conference on Learning Theory},
title = {{Sharp analysis of low-rank kernel matrix approximations}},
year = {2013}
}
@article{charniak1991bayesian,
address = {Menlo Park, CA, USA},
annote = {comps{\_}models},
author = {Charniak, Eugene},
issn = {0738-4602},
journal = {AI Mag.},
number = {4},
pages = {50--63},
publisher = {American Association for Artificial Intelligence},
title = {{Bayesian networks without tears: making Bayesian networks more accessible to the probabilistically unsophisticated}},
url = {http://www.idi.ntnu.no/emner/it3704/lectures/papers/AIMag12-04-007.pdf},
volume = {12},
year = {1991}
}
@inproceedings{mahadevan2006value,
author = {Mahadevan, Sridhar and Maggioni, Mauro},
booktitle = {Advances in Neural Information Processing Systems 18},
pages = {843--850},
title = {{Value Function Approximation with Diffusion Wavelets and {\{}Laplacian{\}} Eigenfunctions}},
year = {2006}
}
@inproceedings{preux2014bandits,
abstract = {We consider function optimization as a sequential decision making problem under the budget constraint. Such constraint limits the number of objective function evaluations allowed during the optimization. We consider an algorithm inspired by a continuous version of a multi-armed bandit problem which attacks this optimization problem by solving the tradeoff between exploration (initial quasi-uniform search of the domain) and exploitation (local optimization around the potentially global maxima). We introduce the so-called Simultaneous Optimistic Optimization (SOO), a deterministic algorithm that works by domain partitioning. The benefit of such an approach are the guarantees on the returned solution and the numerical eficiency of the algorithm. We present this machine learning rooted approach to optimization, and provide the empirical assessment of SOO on the CEC'2014 competition on single objective real-parameter numerical optimization testsuite.},
author = {Preux, Philippe and Munos, R{\'{e}}mi and Valko, Michal},
booktitle = {Congress on Evolutionary Computation},
title = {{Bandits attack function optimization}},
year = {2014}
}
@book{BaHoSchSmTaVi07,
author = {Bakir, G{\"{u}}khan H and Hofmann, Thomas and Sch{\"{o}}lkopf, Bernhard and Smola, Alexander J and Taskar, Ben and Vishwanathan, S V N},
isbn = {0262026171},
publisher = {The MIT Press},
title = {{Predicting Structured Data (Neural Information Processing)}},
year = {2007}
}
@inproceedings{peters10reps,
author = {Peters, Jan and M{\"{u}}lling, Katharina and Altun, Yasemin},
pages = {1607--1612},
title = {{Relative Entropy Policy Search}}
}
@inproceedings{geulen10buffering,
author = {Geulen, S and Voecking, B and Winkler, M},
title = {{Regret Minimization for Online Buffering Problems Using the Weighted Majority Algorithm}}
}
@techreport{kolmogorov2010minimizing,
author = {Kolmogorov, V},
institution = {Arxiv},
number = {1006.1990},
title = {{Minimizing a sum of submodular functions}},
year = {2010}
}
@techreport{toshev2010submodular,
annote = {Written Preliminary Examination},
author = {Toshev, A},
institution = {University of Pennsylvania},
title = {{Submodular Function Minimization}},
year = {2010}
}
@inproceedings{drugan2013designing,
author = {Drugan, Madalina and Nowe, Ann},
booktitle = {International Joint Conference on Neural Networks},
title = {{Designing multi-objective multi-armed bandits algorithms: a study}},
year = {2013}
}
@inproceedings{audibert2011minimax,
author = {Audibert, Jean-Yves and Bubeck, S{\'{e}}bastien and Lugosi, Gabor},
booktitle = {Proceedings of the 24th annual Conference On Learning Theory},
keywords = {bandits},
mendeley-tags = {bandits},
series = {COLT '11},
title = {{Minimax Policies for Combinatorial Prediction Games}},
year = {2011}
}
@book{chapelle2006semi-supervised,
address = {Cambridge, MA},
editor = {Chapelle, O and Sch{\"{o}}lkopf, B and Zien, A},
publisher = {MIT Press},
title = {{Semi-Supervised Learning}},
url = {http://www.kyb.tuebingen.mpg.de/ssl-book},
year = {2006}
}
@phdthesis{osborne2010bayesian,
author = {Osborne, Michael},
school = {University of Oxford},
title = {{Bayesian Gaussian processes for sequential prediction, optimisation and quadrature}},
year = {2010}
}
@article{fowlkes2004spectral,
author = {Fowlkes, Charless and Belongie, Serge and Chung, Fan and Malik, Jitendra},
journal = {IEEE Transactions on PAMI},
number = {2},
title = {{Spectral Grouping Using the Nystrom Method}},
volume = {26},
year = {2004}
}
@article{Wright2009,
author = {Wright, S J and Nowak, R D and Figueiredo, M A T},
journal = {IEEE Transactions on Signal Processing},
number = {7},
pages = {2479--2493},
title = {{Sparse reconstruction by separable approximation}},
volume = {57},
year = {2009}
}
@inproceedings{calandriello2016analysis,
abstract = {Large-scale kernel ridge regression (KRR) is limited by the need to store a large kernel matrix Kt. To avoid storing the entire matrix Kt, Nyström methods subsample a subset of columns of the kernel matrix, and efficiently find an approximate KRR solution on the reconstructed Kt . The chosen subsampling distribution in turn affects the statistical and computational tradeoffs. For KRR problems, [15, 1] show that a sampling distribution proportional to the ridge leverage scores (RLSs) provides strong reconstruction guarantees for Kt. While exact RLSs are as difficult to compute as a KRR solution, we may be able to approximate them well enough. In this paper, we study KRR problems in a sequential setting and introduce the INK-ESTIMATE algorithm, that incrementally computes the RLSs estimates. INK-ESTIMATE maintains a small sketch of Kt, that at each step is used to compute an intermediate es- timate of the RLSs. First, our sketch update does not require access to previously seen columns, and therefore a single pass over the kernel ma- trix is sufficient. Second, the algorithm requires a fixed, small space budget to run dependent only on the effective dimension of the kernel matrix. Finally, our sketch provides strong approximation guarantees on the distance ∥Kt−Kt∥2 , and on the statistical risk of the approximate KRR solution at any time, because all our guarantees hold at any intermediate step.},
author = {Calandriello, Daniele and Lazaric, Alessandro and Valko, Michal},
booktitle = {Uncertainty in Artificial Intelligence},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Calandriello, Lazaric, Valko - 2016 - Analysis of Nystr{\"{o}}m method with sequential ridge leverage scores.pdf:pdf},
title = {{Analysis of Nystr{\"{o}}m method with sequential ridge leverage scores}},
year = {2016}
}
@techreport{heckerman1995tutorial,
address = {Redmond, Washington},
annote = {Revised June 96},
author = {Heckerman, D},
institution = {Microsoft Research},
title = {{A Tutorial on Learning with Bayesian Networks}},
year = {1995}
}
@inproceedings{gyorgy13nearoptimal,
author = {Gy{\"{o}}rgy, Andr{\'{a}}s and Neu, Gergely},
booktitle = {Submitted to the IEEE Transactions on Information Theory},
title = {{Near-Optimal Rates for Limited-Delay Universal Lossy Source Coding}},
year = {2013}
}
@article{spielman2007spectral,
abstract = {Spectral partitioning methods use the Fiedler vector-the eigenvector of the second-smallest eigenvalue of the Laplacian matrix-to find a small separator of a graph. These methods are important components of many scientific numerical algorithms and have been demonstrated by experiment to work extremely well. In this paper, we show that spectral partitioning methods work well on bounded-degree planar graphs and finite element meshes-the classes of graphs to which they are usually applied. While naive spectral bisection does not necessarily work, we prove that spectral partitioning techniques can be used to produce separators whose ratio of vertices removed to edges cut is O (sqrt(n)) for bounded-degree planar graphs and two-dimensional meshes and O(n1/d) for well-shaped d-dimensional meshes. The heart of our analysis is an upper bound on the second-smallest eigenvalues of the Laplacian matrices of these graphs: we prove a bound of O(1/n) for bounded-degree planar graphs and O(1/n2/d) for well-shaped d-dimensional meshes. ?? 2006 Elsevier Inc. All rights reserved.},
author = {Spielman, Daniel A. and Teng, Shang H.},
journal = {Linear Algebra and Its Applications},
keywords = {Eigenvalue problems,Graph embedding,Graph partitioning,Spectral analysis,Spectral methods},
pages = {284--305},
title = {{Spectral partitioning works: Planar graphs and finite element meshes}},
volume = {421},
year = {2007}
}
@article{dreiseitl2002logistic,
address = {San Diego, USA},
author = {Dreiseitl, Stephan and Ohno-Machado, Lucila},
doi = {http://dx.doi.org/10.1016/S1532-0464(03)00034-0},
issn = {1532-0464},
journal = {J. of Biomedical Informatics},
number = {5/6},
pages = {352--359},
publisher = {Elsevier Science},
title = {{Logistic regression and artificial neural network classification models: a methodology review}},
volume = {35},
year = {2002}
}
@inproceedings{singh2008unlabeled,
author = {Singh, Aarti and Nowak, Robert D and Zhu, Xiaojin},
booktitle = {Advances in Neural Information Processing Systems 21},
title = {{Unlabeled data: Now it helps, now it doesn't}},
year = {2008}
}
@inproceedings{lei2015online,
abstract = {Social networks are commonly used for marketing purposes. For example, free samples of a product can be given to a few influential social network users (or "seed nodes"), with the hope that they will convince their friends to buy it. One way to formalize marketers' objective is through influence maximization (or IM), whose goal is to find the best seed nodes to activate under a fixed budget, so that the number of people who get influenced in the end is maximized. Recent solutions to IM rely on the influence probability that a user influences another one. However, this probability information may be unavailable or incomplete. In this paper, we study IM in the absence of complete information on influence probability. We call this problem Online Influence Maximization (OIM) since we learn influence probabilities at the same time we run influence campaigns. To solve OIM, we propose a multiple-trial approach, where (1) some seed nodes are selected based on existing influence information; (2) an influence campaign is started with these seed nodes; and (3) users' feedback is used to update influence information. We adopt the Explore-Exploit strategy, which can select seed nodes using either the current influence probability estimation (exploit), or the confidence bound on the estimation (explore). Any existing IM algorithm can be used in this framework. We also develop an incremental algorithm that can significantly reduce the overhead of handling users' feedback information. Our experiments show that our solution is more effective than traditional IM methods on the partial information.},
author = {Lei, Siyu and Maniu, Silviu and Mo, Luyi and Cheng, Reynold and Senellart, Pierre},
booktitle = {Knowledge Discovery and Data mining},
title = {{Online influence maximization}},
year = {2015}
}
@inproceedings{goldberg2008online,
author = {Goldberg, Andrew and Li, Ming and Zhu, Xiaojin},
booktitle = {Proceeding of European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases},
title = {{Online Manifold Regularization: A New Learning Setting and Empirical Study}},
year = {2008}
}
@article{chopra1989spanning,
author = {Chopra, S},
journal = {Operations Research Letters},
number = {1},
pages = {25--29},
publisher = {Elsevier},
title = {{On the spanning tree polyhedron}},
volume = {8},
year = {1989}
}
@inproceedings{KP11,
author = {Kapralov, Michael and Panigrahy, Rina},
pages = {828--836},
title = {{Prediction strategies without loss}},
year = {2011}
}
@article{cunningham1985minimum,
author = {Cunningham, W H},
journal = {Networks},
number = {2},
pages = {205--215},
publisher = {Wiley Online Library},
title = {{Minimum cuts, modular functions, and matroid polyhedra}},
volume = {15},
year = {1985}
}
@article{kalai2003efficient,
author = {Kalai, Adam and Vempala, Santosh},
journal = {Journal of Computer and System Sciences},
number = {3},
pages = {291--307},
title = {{Efficient algorithms for online decision problems}},
volume = {71},
year = {2005}
}
@misc{asuncion2007uci,
author = {Asuncion, A and Newman, D J},
institution = {University of California, Irvine, School of Information and Computer Sciences},
title = {{{\{}UCI{\}} Machine Learning Repository}},
url = {http://www.ics.uci.edu/{~}mlearn/{\%}7BMLR{\%}7Depository.html},
year = {2011}
}
@inproceedings{daniel2012hreps,
author = {Daniel, C and Neumann, G and Peters, J},
booktitle = {Proceedings of the Fifteenth International Conference on Artificial Intelligence and Statistics},
pages = {273--281},
series = {JMLR Workshop and Conference Proceedings},
title = {{Hierarchical Relative Entropy Policy Search}},
volume = {22},
year = {2012}
}
@inproceedings{kleinbergregret,
author = {Kleinberg, Robert D and Niculescu-Mizil, Alexandru and Sharma, Yogeshwer},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {425--436},
title = {{Regret Bounds for Sleeping Experts and Bandits}}
}
@inproceedings{hauskrecht2004linear,
author = {Hauskrecht, Milos and Kveton, Branislav},
booktitle = {Advances in Neural Information Processing Systems 16},
pages = {895--902},
title = {{Linear Program Approximations for Factored Continuous-State {\{}Markov{\}} Decision Processes}},
year = {2004}
}
@inproceedings{melo2010analysis,
author = {Melo, F S and Lopes, M and Ferreira, R},
booktitle = {Proceedings of the 2010 conference on ECAI 2010: 19th European Conference on Artificial Intelligence},
organization = {IOS Press},
pages = {349--354},
title = {{Analysis of inverse reinforcement learning with perturbed demonstrations}},
url = {http://flowers.inria.fr/mlopes/myrefs/10-ecai.pdf},
year = {2010}
}
@incollection{chickering1996learning,
author = {Chickering, David M},
booktitle = {Learning from Data: Artificial Intelligence and Statistics V},
editor = {Fisher, D and Lenz, H},
keywords = {complexity,graphical-models},
pages = {121--130},
publisher = {Springer-Verlag},
title = {{Learning {\{}Bayesian{\}} Networks is {\{}NP{\}}-{\{}Complete{\}}}},
url = {http://research.microsoft.com/copyright/accept.asp?path=http://research.microsoft.com/{~}dmax/publications/lns96.pdf{\&}{\#}38;pub=15},
year = {1996}
}
@inproceedings{kaufmann2013information,
author = {Kaufmann, Emilie and Kalyanakrishnan, Shivaram},
booktitle = {Conference on Learning Theory},
title = {{Information complexity in bandit subset selection}},
year = {2013}
}
@inproceedings{guestrin2002context,
author = {Guestrin, Carlos and Venkataraman, Shobha and Koller, Daphne},
booktitle = {Proceedings of the 18th National Conference on Artificial Intelligence},
pages = {253--259},
title = {{Context Specific Multiagent Coordination and Planning with Factored {\{}MDPs{\}}}},
year = {2002}
}
@article{kulesza2011kdpp,
abstract = {Determinantal point processes ( DPPs ) have recently been proposed$\backslash$nas models for set selection problems where diversity is pre- ferred.$\backslash$nFor example, they can be used to select diverse sets of sentences$\backslash$nto form doc- ument summaries, or to find multiple non- overlapping$\backslash$nhuman ...},
author = {Kulesza, a and Taskar, B},
isbn = {978-1-4503-0619-5},
issn = {{\textless}null{\textgreater}},
journal = {International Conference on Machine Learning},
keywords = {To Read Urgently},
pages = {1193--1200},
title = {{k-DPPs: Fixed-Size Determinantal Point Processes}},
url = {http://158.130.69.163/{~}taskar/pubs/kdpps{\_}icml11.pdf{\%}5Cnpapers2://publication/uuid/49BE74F9-9BC7-433A-8533-9508003732F8},
year = {2011}
}
@article{srivastava1996predictive,
author = {Srivastava, Mani and Chandrakasan, Anantha and Brodersen, Robert},
journal = {IEEE Transactions on Very Large Scale Integration Systems},
number = {1},
pages = {42--55},
title = {{Predictive System Shutdown and Other Architectural Techniques for Energy Effcient Programmable Computation}},
volume = {4},
year = {1996}
}
@inproceedings{kolla2016collaborative,
abstract = {We consider a collaborative online learning paradigm, wherein a group of agents connected through a social network are engaged in playing a stochastic multi-armed bandit game. Each time an agent takes an action, the corresponding reward is instantaneously observed by the agent, as well as its neighbours in the social network. We perform a regret analysis of various policies in this collaborative learning setting. A key finding of this paper is that natural extensions of widely-studied single agent learning policies to the network setting need not perform well in terms of regret. In particular, we identify a class of non-altruistic and individually consistent policies, and argue by deriving regret lower bounds that they are liable to suffer a large regret in the networked setting. We also show that the learning performance can be substantially improved if the agents exploit the structure of the network, and develop a simple learning algorithm based on dominating sets of the network. Specifically, we first consider a star network, which is a common motif in hierarchical social networks, and show analytically that the hub agent can be used as an information sink to expedite learning and improve the overall regret. We also derive networkwide regret bounds for the algorithm applied to general networks. We conduct numerical experiments on a variety of networks to corroborate our analytical results.},
author = {Kolla, Ravi Kumar and Jagannathan, Krishna and Gopalan, Aditya},
booktitle = {Annual Allerton Conference on Communication, Control, and Computing},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Kolla, Jagannathan, Gopalan - 2016 - Collaborative learning of stochastic bandits over a social etwork.pdf:pdf},
title = {{Collaborative learning of stochastic bandits over a social network}},
year = {2016}
}
@inproceedings{hauskrecht2007evidence-based,
abstract = {Anomaly detection methods can be very useful in identifying interesting or concerning events. In this work, we develop and examine new probabilistic anomaly detection methods that let us evaluate management decisions for a specific patient and identify those decisions that are highly unusual with respect to patients with the same or similar condition. The statistics used in this detection are derived from probabilistic models such as Bayesian networks that are learned from a database of past patient cases. We evaluate our methods on the problem of detection of unusual hospitalization patterns for patients with community acquired pneumonia. The results show very encouraging detection performance with 0.5 precision at 0.53 recall and give us hope that these techniques may provide the basis of intelligent monitoring systems that alert clinicians to the occurrence of unusual events or decisions.},
author = {Hauskrecht, Milos and Valko, Michal and Kveton, Branislav and Visweswaran, Shyam and Cooper, Gregory F},
booktitle = {Annual American Medical Informatics Association Symposium},
keywords = {misovalko},
mendeley-tags = {misovalko},
month = {nov},
pages = {319--324},
title = {{Evidence-based anomaly detection}},
year = {2007}
}
@book{papadimitriou1998combinatorial,
author = {Papadimitriou, Christos and Steiglitz, Kenneth},
publisher = {Dover Publications},
title = {{Combinatorial Optimization}},
year = {1998}
}
@article{hastings1970monte,
author = {Hastings, W K},
journal = {Biometrika},
pages = {97--109},
title = {{{\{}Monte Carlo{\}} Sampling Methods Using {\{}Markov{\}} Chains and Their Application}},
volume = {57},
year = {1970}
}
@article{Haasdonk2010,
author = {Haasdonk, Bernard and Pekalska, Elzbieta},
journal = {Advances in Data Analysis, Data Handling and Business Intelligence},
pages = {351--361},
publisher = {Springer},
title = {{Classification with kernel Mahalanobis distance classifiers}},
year = {2010}
}
@inproceedings{ratliff07subgradient,
author = {Ratliff, Nathan and Bagnell, James (Drew) and Zinkevich, Martin},
pages = {2:380--387},
title = {{({\{}O{\}}nline) {\{}S{\}}ubgradient Methods for Structured Prediction}}
}
@article{ahmed2011maximizing,
author = {Ahmed, S and Atamt{\"{u}}rk, A},
journal = {Mathematical Programming: Series A and B},
number = {1-2},
pages = {149--169},
publisher = {Springer-Verlag New York, Inc.},
title = {{Maximizing a class of submodular utility functions}},
volume = {128},
year = {2011}
}
@inproceedings{GGLB11,
author = {Gabillon, V and Ghavamzadeh, M and Lazaric, A and Bubeck, S},
booktitle = {Neural Information Processing Systems (NIPS)},
title = {{Multi-Bandit Best Arm Identification}},
year = {2011}
}
@inproceedings{boyan2001exact,
author = {Boyan, Justin and Littman, Michael},
booktitle = {Advances in Neural Information Processing Systems 13},
pages = {1026--1032},
title = {{Exact Solutions to Time-Dependent {\{}MDPs{\}}}},
year = {2001}
}
@incollection{filippi2010parametric,
author = {Filippi, Sarah and Cappe, Olivier and Garivier, Aur{\'{e}}lien and Szepesvari, Csaba},
booktitle = {Advances in Neural Information Processing Systems 23},
editor = {Lafferty, J and Williams, C K I and Shawe-Taylor, J and Zemel, R S and Culotta, A},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {586--594},
title = {{Parametric Bandits: The Generalized Linear Case}},
year = {2010}
}
@article{RP11,
author = {Ryzhov, I and Powell, W},
journal = {Operations Research},
pages = {188--201},
title = {{Information Collection on a Graph}},
volume = {59},
year = {2011}
}
@inproceedings{collins04perceptron,
address = {Morristown, NJ, USA},
author = {Collins, Michael and Roark, Brian},
booktitle = {ACL '04: Proceedings of the 42nd Annual Meeting of the Association for Computational Linguistics},
doi = {http://dx.doi.org/10.3115/1218955.1218970},
pages = {111--118},
publisher = {Association for Computational Linguistics},
title = {{Incremental parsing with the perceptron algorithm}},
year = {2004}
}
@inproceedings{ghahramani2000graphical,
annote = {comps{\_}models},
author = {Ghahramani, Zoubin and Beal, Matthew J},
booktitle = {Advanced Mean Field Methods - Theory and Practice},
publisher = {MIT Press},
title = {{Graphical models and variational methods}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.31.7693},
year = {2000}
}
@incollection{torresani2007large,
address = {Cambridge, MA},
annote = {comps{\_}distance},
author = {Torresani, Lorenzo and Lee, Kuang-chih},
booktitle = {Advances in Neural Information Processing Systems 19},
editor = {Scholkopf, B and Platt, J and Hoffman, T},
pages = {1385--1392},
publisher = {MIT Press},
title = {{Large Margin Component Analysis}},
url = {http://books.nips.cc/papers/files/nips19/NIPS2006{\_}0791.pdf},
year = {2007}
}
@inproceedings{korda2016distributed,
abstract = {We provide two distributed confidence ball algorithms for solving linear bandit problems in peer to peer networks with limited communication capabilities. For the first, we assume that all the peers are solving the same linear bandit problem, and prove that our algorithm achieves the optimal asymptotic regret rate of any centralised algorithm that can instantly communicate information between the peers. For the second, we assume that there are clusters of peers solving the same bandit problem within each cluster, and we prove that our algorithm discovers these clusters, while achieving the optimal asymptotic regret rate within each one. Through experiments on several real-world datasets, we demonstrate the performance of proposed algorithms compared to the state-of-the-art.},
author = {Korda, Nathan and Sz{\"{o}}r{\'{e}}nyi, Bal{\'{a}}zs and Li, Shuai},
booktitle = {International Conference on Machine Learning},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Korda, Szorenyi, Li - 2016 - Distributed Clustering of Linear Bandits in Peer to Peer Networks.pdf:pdf},
month = {apr},
title = {{Distributed clustering of linear bandits in peer to peer networks}},
year = {2016}
}
@incollection{suehiro12submodular,
author = {Suehiro, Daiki and Hatano, Kohei and Kijima, Shuji and Takimoto, Eiji and Nagano, Kiyohito},
booktitle = {Algorithmic Learning Theory},
isbn = {978-3-642-34105-2},
pages = {260--274},
publisher = {Springer Berlin Heidelberg},
series = {Lecture Notes in Computer Science},
title = {{Online Prediction under Submodular Constraints}},
volume = {7568},
year = {2012}
}
@article{jegelka2011online,
abstract = {Key processes during recruitment of Quercus petraea and Corylus avellana were investigated in abandoned calcareous grasslands and adjacent scrub using the following methods: (1) observation of hoarding animals during the main period of ripening of acorns and hazel nuts, (2) exposition of seeds on the soil surface and in 56 cm depth to test differences in predation and germination, and (3) mapping of seedlings in the grasslands. European jays (Garrulus glandarius) and mice were the main disperser of seeds. Jays preferred acorns, whereas the rodents were less selective, but probably more important for dispersal of nuts. The maximum dispersal distance was about 1020 m for mice and was estimated several hundred metres for jays. Mice collected hoards of several seeds in about 2 cm depth in the soil, whereas jays stored single seeds. Seed predation and probably hoarding by mice were highest under scrub and in unmown grassland, while jays preferred mown sites for hoarding. However, hiding of seeds in the soil reduced losses in all sites. Predation of nuts was slightly less intensive than that of acorns. Seeds of Corylus were more sensitive to desiccation than Quercus, but in both species germination was lower for seeds exposed on the soil surface and in drier sites. Quercus and Corylus were the most abundant woody species in the fallow grasslands, probably due to the effective multi-staged dispersal by jays and mice, whereas wind-dispersed and fleshy-fruited species were less common; the latter restricted to margins of adjacent scrubland. The study provides several examples for discordance in suitability of patches for seeds and seedlings due to different habitat requirements of successive developmental stages. This result emphasizes the need for studies in the multiple stages during recruitment of vertebrate-dispersed plants.},
author = {Jegelka, Stefanie and Bilmes, Jeff},
editor = {Getoor, Lise and Scheffer, Tobias},
isbn = {9781450306195},
journal = {Strategies},
number = {1},
pages = {345--352},
publisher = {ACM},
series = {ICML '11},
title = {{Online Submodular Minimization for Combinatorial Structures}},
url = {http://ssli.ee.washington.edu/{~}bilmes/mypubs/jegelka2011-online-submodular-min.extended.pdf},
volume = {125},
year = {2011}
}
@book{Led01,
author = {Ledoux, M},
publisher = {American Mathematical Society},
title = {{The Concentration of Measure Phenomenon}},
year = {2001}
}
@article{lovasz1983submodular,
author = {Lov{\'{a}}sz, L{\'{a}}szl{\'{o}}},
editor = {Bachem, Armin and Gr{\"{o}}tschel, Martin and Korte, Bernhard H},
journal = {Mathematical programming the state of the art},
pages = {235--257},
publisher = {Berlin: Springer-Verlag},
title = {{Submodular functions and convexity}},
url = {http://www.cs.elte.hu/{~}lovasz/scans/submodular.pdf},
year = {1983}
}
@book{hansen2004global,
author = {Hansen, Eldon and Walster, William},
isbn = {9780824740597},
publisher = {Marcel Dekker},
series = {Pure and Applied Mathematics Series},
title = {{Global Optimization Using Interval Analysis: Revised and Expanded}},
url = {http://books.google.fr/books?id=tY2wAkb-zLcC},
year = {2004}
}
@article{bull2011convergence,
author = {Bull, Adam},
journal = {The Journal of Machine Learning Research},
pages = {2879--2904},
publisher = {JMLR. org},
title = {{Convergence rates of efficient global optimization algorithms}},
volume = {12},
year = {2011}
}
@inproceedings{dekel13det,
author = {Dekel, Ofer and Hazan, Elad},
booktitle = {Proceedings of the 30th International Conference on Machine Learning (ICML-13)},
editor = {Dasgupta, Sanjoy and McAllester, David},
number = {3},
pages = {675--683},
publisher = {JMLR Workshop and Conference Proceedings},
title = {{Better Rates for Any Adversarial Deterministic MDP}},
volume = {28},
year = {2013}
}
@book{bertsekas1995dynamic,
address = {Belmont, MA},
author = {Bertsekas, Dimitri},
publisher = {Athena Scientific},
title = {{Dynamic Programming and Optimal Control}},
year = {1995}
}
@techreport{BN99,
author = {Ben-Tal, A and Nemirovski, A},
institution = {MINERVA Optimization Center Report, Faculty of Industrial Engineering and Management, Technion—Israel Institute of Technology, Haifa},
title = {{The conjugate barrier mirror descent method for non-smooth convex optimization}},
year = {1999}
}
@book{vapnik1995nature,
address = {New York, NY, USA},
author = {Vapnik, Vladimir N},
isbn = {0-387-94559-8},
publisher = {Springer-Verlag New York, Inc.},
title = {{The nature of statistical learning theory}},
year = {1995}
}
@inproceedings{BMSS09,
author = {Bubeck, S and Munos, R and Stoltz, G and Szepesvari, Cs.},
booktitle = {Advances in Neural Information Processing Systems (NIPS)},
pages = {201--208},
title = {{Online Optimization in $\backslash$mathcal{\{}X{\}}-Armed Bandits}},
year = {2009}
}
@inproceedings{chang2009inferring,
author = {Chang, Keng$\backslash$-hao and Hightower, Jeffrey and Kveton, Branislav},
booktitle = {Proceedings of the 7th International Conference on Pervasive Computing},
pages = {151--167},
title = {{Inferring Identity Using Accelerometers in Television Remote Controls}},
year = {2009}
}
@article{bradski2000opencv,
author = {Bradski, G},
journal = {Dr. Dobb's Journal of Software Tools},
keywords = {bibtex-import},
title = {{The OpenCV Library}},
year = {2000}
}
@inproceedings{Jenatton2010a,
author = {Jenatton, R and Mairal, J and Obozinski, G and Bach, F},
booktitle = {Proceedings of the International Conference on Machine Learning (ICML)},
title = {{Proximal Methods for Sparse Hierarchical Dictionary Learning}},
year = {2010}
}
@incollection{Combettes2010,
author = {Combettes, P L and Pesquet, J.-C.},
booktitle = {Fixed-Point Algorithms for Inverse Problems in Science and Engineering},
publisher = {Springer},
title = {{Proximal splitting methods in signal processing}},
year = {2010}
}
@article{cornuejols1977location,
author = {Cornuejols, G and Fisher, M L and Nemhauser, G L},
journal = {Management Science},
number = {8},
pages = {789--810},
publisher = {JSTOR},
title = {{Location of bank accounts to optimize float: An analytic study of exact and approximate algorithms}},
volume = {23},
year = {1977}
}
@article{boularias2013apprenticeship,
author = {Boularias, Abdeslam and Chaib-draa, Brahim},
doi = {http://dx.doi.org/10.1016/j.neucom.2012.11.002},
issn = {0925-2312},
journal = {Neurocomputing},
keywords = {Bootstrapping,Imitation learning,Inverse reinforcement learning,Transfer learning},
number = {0},
pages = {83--96},
title = {{Apprenticeship learning with few examples}},
url = {http://www.sciencedirect.com/science/article/pii/S0925231212008363},
volume = {104},
year = {2013}
}
@article{pickland1975statistical,
abstract = {A method is presented for making statistical inferences about the upper tail of a distribution function. It is useful for estimating the probabilities of future extremely large observations. The method is applicable if the underlying distribution function satisfies a condition which holds for all common continuous distribution functions.},
author = {Pickands, James III},
journal = {The Annals of Statistics},
pages = {119--131},
title = {{Statistical Inference Using Extreme Order Statistics}},
volume = {3},
year = {1975}
}
@inproceedings{sutton2008dyna-style,
author = {Sutton, Richard and Szepesvari, Csaba and Geramifard, Alborz and Bowling, Michael},
booktitle = {Proceedings of the 24th Conference on Uncertainty in Artificial Intelligence},
pages = {528--536},
title = {{Dyna-Style Planning with Linear Function Approximation and Prioritized Sweeping}},
year = {2008}
}
@inproceedings{koolen2013pareto,
author = {{Wouter M. Koolen}},
booktitle = {Neural Information Processing Systems},
title = {{The Pareto regret frontier}},
year = {2013}
}
@article{CBGe08,
author = {Cesa-Bianchi, N and Gentile, C},
doi = {10.1109/TIT.2007.911292},
issn = {0018-9448},
journal = {IEEE Transactions on Information Theory},
keywords = {arbitrary learning algorithm;ensemble;incremental},
number = {1},
pages = {386--390},
title = {{Improved Risk Tail Bounds for On-Line Algorithms}},
volume = {54},
year = {2008}
}
@article{erkan2009semi-supervised,
abstract = {Various supervised inference methods can be analyzed as convex duals of the generalized maximum entropy (MaxEnt) framework. Generalized MaxEnt aims to find a distribution that maximizes an entropy function while respecting prior information represented as potential functions in miscellaneous forms of constraints and/or penalties. We extend this framework to semi-supervised learning by incorporating unlabeled data via modifications to these potential functions reflecting structural assumptions on the data geometry. The proposed approach leads to a family of discriminative semi-supervised algorithms, that are convex, scalable, inherently multi-class, easy to implement, and that can be kernelized naturally. Experimental evaluation of special cases shows the competitiveness of our methodology.},
author = {Erkan, Ayse Naz and Altun, Yasemin},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Erkan, Altun - 2009 - Semi-Supervised Learning via Generalized Maximum Entropy.pdf:pdf},
journal = {Proceedings of JMLR Workshop},
keywords = {computational,information theoretic learning with statistics},
number = {September},
pages = {209--216},
publisher = {New York University},
title = {{Semi-Supervised Learning via Generalized Maximum Entropy}},
url = {http://eprints.pascal-network.org/archive/00006122/},
volume = {9},
year = {2009}
}
@article{spielman_spectral_2011,
author = {Spielman, Daniel A and Teng, Shang-Hua},
journal = {SIAM Journal on Computing},
number = {4},
pages = {981--1025},
title = {{Spectral sparsification of graphs}},
url = {http://epubs.siam.org/doi/abs/10.1137/08074489X},
volume = {40},
year = {2011}
}
@techreport{kapoor1996assessment,
author = {Kapoor, W N},
institution = {Agency for Health Policy and Research (AHCPR)},
title = {{Assessment of the Variantion and Outcomes of Pneumonia: Pneumonia Patient Outcomes Research Team ({\{}PORT{\}}) Final Report}},
year = {1996}
}
@inproceedings{ADX10,
author = {Agarwal, Alekh and Dekel, Ofer and Xiao, Lin},
booktitle = {Proceedings of the 23rd Annual Conference on Learning Theory (COLT)},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {28--40},
title = {{Optimal Algorithms for Online Convex Optimization with Multi-Point Bandit Feedback}},
url = {http://www.cs.berkeley.edu/{~}alekh/bandits-colt.pdf},
year = {2010}
}
@inproceedings{Schmidt2010,
author = {Schmidt, M and Murphy, K},
booktitle = {Proceedings of the International Conference on Artificial Intelligence and Statistics (AISTATS)},
title = {{Convex Structure Learning in Log-Linear Models: Beyond Pairwise Potentials}},
year = {2010}
}
@inproceedings{hkl,
author = {Bach, F},
booktitle = {Adv. NIPS},
title = {{Exploring Large Feature Spaces with Hierarchical Multiple Kernel Learning}},
year = {2008}
}
@article{Ratliff2007,
abstract = {Decision making in robotics often involves computing an optimal action for a given state, where the space of actions under consideration can potentially be large and state dependent. Many of these decision making problems can be naturally formalized in the multiclass classification framework, where actions are regarded as labels for states. One powerful approach to multiclass classification relies on learning a function that scores each action; action selection is done by returning the action with maximum score. In this work, we focus on two imitation learning problems in particular that arise in robotics. The first problem is footstep prediction for quadruped locomotion, in which the system predicts next footstep locations greedily given the current four-foot configuration of the robot over a terrain height map. The second problem is grasp prediction, in which the system must predict good grasps of complex free-form objects given an approach direction for a robotic hand. We present experimental results of applying a recently developed functional gradient technique for optimizing a structured margin formulation of the corresponding large non-linear multiclass classification problems.},
author = {Ratliff, Nathan and Bagnell, J Andrew and Srinivasa, Siddhartha S},
doi = {10.1109/ICHR.2007.4813899},
institution = {Robotics Institute},
isbn = {9781424418619},
journal = {2007 7th IEEERAS International Conference on Humanoid Robots},
pages = {392--397},
publisher = {Ieee},
title = {{Imitation learning for locomotion and manipulation}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4813899},
year = {2007}
}
@article{Ipsen11ergodicitycoeff,
author = {Ipsen, Ilse C F and Selee, Teresa M},
journal = {SIAM J. Matrix Analysis Applications},
number = {1},
pages = {153--200},
title = {{Ergodicity Coefficients Defined by Vector Norms}},
volume = {32},
year = {2011}
}
@article{glynn1990likelihood,
author = {Glynn, Peter},
journal = {Communications of the ACM},
number = {10},
pages = {75--84},
title = {{Likelihood Ratio Gradient Estimation for Stochastic Systems}},
volume = {33},
year = {1990}
}
@article{Zhao2009,
author = {Zhao, P and Rocha, G and Yu, B},
journal = {Annals of Statistics},
number = {6A},
pages = {3468--3497},
title = {{The composite absolute penalties family for grouped and hierarchical variable selection}},
volume = {37},
year = {2009}
}
@inproceedings{chung1999dynamic,
author = {Chung, Eui-Young and Benini, Luca and de Micheli, Giovanni},
booktitle = {Proceedings of the 1999 IEEE / ACM International Conference on Computer-Aided Design},
pages = {274--279},
title = {{Dynamic Power Management Using Adaptive Learning Tree}},
year = {1999}
}
@inproceedings{augustin,
author = {Lef{\`{e}}vre, A and Bach, F and F{\'{e}}votte, C},
booktitle = {Proceedings of the International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
title = {{Itakura-{\{}S{\}}aito Nonnegative Matrix Factorization With Group Sparsity}},
year = {2011}
}
@book{rockafellar81theory,
author = {Rockafellar, R Tyrell},
publisher = {Heldermann Verlag, Berlin},
title = {{The theory of subgradients and its applications to problems of optimization: Convex and nonconvex functions}},
year = {1981}
}
@inproceedings{farias2005exploration-exploitation,
author = {de Farias, Daniela Pucci and Megiddo, Nimrod},
booktitle = {Advances in Neural Information Processing Systems 17},
pages = {409--416},
title = {{Exploration-Exploitation Tradeoffs for Experts Algorithms in Reactive Environments}},
year = {2005}
}
@incollection{klein2012inverse,
author = {Klein, Edouard and Geist, Matthieu and PIOT, BILAL and Pietquin, Olivier},
booktitle = {Advances in Neural Information Processing Systems 25},
editor = {Bartlett, P and Pereira, F C N and Burges, C J C and Bottou, L and Weinberger, K Q},
pages = {1016--1024},
title = {{Inverse Reinforcement Learning through Structured Classification}},
url = {http://books.nips.cc/papers/files/nips25/NIPS2012{\_}0491.pdf},
year = {2012}
}
@article{chen2015combinatorial,
abstract = {We define a general framework for a large class of combinatorial multi-armed bandit (CMAB) problems, where subsets of base arms with unknown distributions form super arms. In each round, a super arm is played and the base arms contained in the super arm are played and their outcomes are observed. We further consider the extension in which more based arms could be probabilistically triggered based on the outcomes of already triggered arms. The reward of the super arm depends on the outcomes of all played arms, and it only needs to satisfy two mild assumptions, which allow a large class of nonlinear reward instances. We assume the availability of an offline ($\backslash$alpha,$\backslash$beta)-approximation oracle that takes the means of the outcome distributions of arms and outputs a super arm that with probability {\{}$\backslash$beta{\}} generates an {\{}$\backslash$alpha{\}} fraction of the optimal expected reward. The objective of an online learning algorithm for CMAB is to minimize ($\backslash$alpha,$\backslash$beta)-approximation regret, which is the difference between the $\backslash$alpha{\{}$\backslash$beta{\}} fraction of the expected reward when always playing the optimal super arm, and the expected reward of playing super arms according to the algorithm. We provide CUCB algorithm that achieves O(log n) distribution-dependent regret, where n is the number of rounds played, and we further provide distribution-independent bounds for a large class of reward functions. Our regret analysis is tight in that it matches the bound of UCB1 algorithm (up to a constant factor) for the classical MAB problem, and it significantly improves the regret bound in a earlier paper on combinatorial bandits with linear rewards. We apply our CMAB framework to two new applications, probabilistic maximum coverage and social influence maximization, both having nonlinear reward structures. In particular, application to social influence maximization requires our extension on probabilistically triggered arms.},
author = {Chen, Wei and Wang, Yajun and Yuan, Yang},
journal = {Journal of Machine Learning Research},
title = {{Combinatorial multi-armed bandit and its extension to probabilistically triggered arms}},
volume = {17},
year = {2016}
}
@inproceedings{menzies2006bayesian,
annote = {comps{\_}anX},
author = {Menzies, T and Allen, D and Orrego, A},
booktitle = {Proceedings of the Machine Learning Algorithms for Surveillance and Event Detection Workshop},
title = {{Bayesian Anomaly Detection}},
url = {http://menzies.us/pdf/06bad.pdf},
year = {2006}
}
@article{gilks1992derivative,
author = {Gilks, W. R.},
journal = {Bayesian Statistics},
title = {{Derivative-free adaptive rejection sampling for Gibbs sampling}},
volume = {4},
year = {1992}
}
@inproceedings{valko2008conditional,
abstract = {Anomaly detection methods can be very useful in identifying unusual or interesting patterns in data. A recently proposed conditional anomaly detection framework extends anomaly detection to the problem of identifying anomalous patterns on a subset of attributes in the data. The anomaly always depends (is conditioned) on the value of remaining attributes. The work presented in this paper focuses on instance-based methods for detecting conditional anomalies. The methods rely on the distance metric to identify examples in the dataset that are most critical for detecting the anomaly. We investigate various metrics and metric learning methods to optimize the performance of the instance-based anomaly detection methods. We show the benefits of the instance-based methods on two real-world detection problems: detection of unusual admission decisions for patients with the community-acquired pneumonia and detection of unusual orders of an HPF4 test that is used to confirm Heparin induced thrombocytopenia - a life-threatening condition caused by the Heparin therapy.},
author = {Valko, Michal and Cooper, Gregory F and Seybert, Amy and Visweswaran, Shyam and Saul, Melissa and Hauskrecht, Milos},
booktitle = {Workshop on Machine Learning in Health Care Applications in The 25th International Conference on Machine Learning},
keywords = {misovalko},
mendeley-tags = {misovalko},
title = {{Conditional anomaly detection methods for patient-management alert systems}},
year = {2008}
}
@article{ching2015one,
author = {Ching, Avery and Edunov, Sergey and Kabiljo, Maja and Logothetis, Dionysios and Muthukrishnan, Sambavi},
journal = {Proceedings of the VLDB Endowment},
number = {12},
pages = {1804--1815},
publisher = {VLDB Endowment},
title = {{One trillion edges: graph processing at Facebook-scale}},
volume = {8},
year = {2015}
}
@inproceedings{chambolle2005total,
author = {Chambolle, A},
booktitle = {Energy Minimization Methods in Computer Vision and Pattern Recognition},
organization = {Springer},
pages = {136--152},
title = {{Total variation minimization and a class of binary {\{}MRF{\}} models}},
year = {2005}
}
@article{heard2010bayesian,
author = {Heard, Nicholas A and Weston, David J and Platanioti, Kiriaki and Hand, David J},
doi = {10.1214/10-AOAS329SUPPB},
journal = {Annals of Applied Statistics},
pages = {645--662},
title = {{Bayesian anomaly detection methods for social networks}},
volume = {4},
year = {2010}
}
@inproceedings{liu2010large,
author = {Liu, W and He, Junfeng and Chang, Shih-Fu},
booktitle = {ICML},
title = {{Large Graph Construction for Scalable Semi-Supervised Learning}},
year = {2010}
}
@inproceedings{valko2011conditional,
abstract = {Timely detection of concerning events is an important problem in clinical
practice. In this paper, we consider the problem of conditional anomaly
detection that aims to identify data instances with an unusual response,
such as the omission of an important lab test. We develop a new non-parametric
approach for conditional anomaly detection based on the soft harmonic
solution, with which we estimate the confidence of the label to detect
anomalous mislabeling. We further regularize the solution to avoid
the detection of isolated examples and examples on the boundary of
the distribution support. We demonstrate the efficacy of the proposed
method in detecting unusual labels on a real-world electronic health
record dataset and compare it to several baseline approaches.},
author = {Valko, Michal and Valizadegan, Hamed and Kveton, Branislav and Cooper, Gregory F and Hauskrecht, Milos},
booktitle = {The 28th International Conference on Machine Learning Workshop on Machine Learning for Global Challenges},
keywords = {misovalko},
mendeley-tags = {misovalko},
month = {jun},
title = {{Conditional Anomaly Detection Using Soft Harmonic Functions: An Application to Clinical Alerting}},
year = {2011}
}
@inproceedings{cevher,
author = {Krause, A and Cevher, V},
booktitle = {Proc. ICML},
title = {{Submodular dictionary selection for sparse representation}},
year = {2010}
}
@article{Wil96,
author = {Willems, F M J},
journal = {IEEE Transactions on Information Theory},
pages = {2210--2217},
title = {{Coding for a binary independent piecewise-identically-distributed source}},
volume = {IT-42},
year = {1996}
}
@inproceedings{bao2016online,
abstract = {Social networks have been popular platforms for information propagation. An important use case is viral marketing: given a promotion budget, an advertiser can choose some influential users as the seed set and provide them free or discounted sample products; in this way, the advertiser hopes to increase the popularity of the product in the users' friend circles by the world-of-mouth effect, and thus maximizes the number of users that information of the production can reach. There has been a body of literature studying the influence maximization problem. Nevertheless, the existing studies mostly investigate the problem on a one-off basis, assuming fixed known influence probabilities among users, or the knowledge of the exact social network topology. In practice, the social network topology and the influence probabilities are typically unknown to the advertiser, which can be varying over time, i.e., in cases of newly established, strengthened or weakened social ties. In this paper, we focus on a dynamic non-stationary social network and design a randomized algorithm, RSB, based on multi-armed bandit optimization, to maximize influence propagation over time. The algorithm produces a sequence of online decisions and calibrates its explore-exploit strategy utilizing outcomes of previous decisions. It is rigorously proven to achieve an upper-bounded regret in reward and applicable to large-scale social networks. Practical effectiveness of the algorithm is evaluated using both synthetic and real-world datasets, which demonstrates that our algorithm outperforms previous stationary methods under non-stationary conditions.},
author = {Bao, Yixin and Wang, Xiaoke and Wang, Zhi and Wu, Chuan and Lau, Francis C. M.},
booktitle = {International Symposium on Quality of Service},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Bao et al. - 2016 - Online Influence Maximization in Non-Stationary Social Networks.pdf:pdf},
month = {apr},
title = {{Online influence maximization in non-stationary social networks}},
year = {2016}
}
@article{farias2003linear,
author = {de Farias, Daniela Pucci and {Van Roy}, Benjamin},
journal = {Operations Research},
number = {6},
pages = {850--856},
title = {{The Linear Programming Approach to Approximate Dynamic Programming}},
volume = {51},
year = {2003}
}
@unpublished{BaPaSzSz11-online,
annote = {From Duplicate 1 (Online learning - Bartok, G; Pal, D; Szepesvari, C; Szita, I)
Lecture Notes
From Duplicate 2 (Online Learning - Bart{\'{o}}k, G; P{\'{a}}l, D; Szepesv{\'{a}}ri, Cs.; Szita, I)
https://moodle.cs.ualberta.ca/file.php/354/notes.pdf},
author = {Bart{\'{o}}k, G and P{\'{a}}l, D and Szepesv{\'{a}}ri, Cs. and Szita, I and Bartok, G and Pal, D and Szepesvari, C and Szita, I},
howpublished = {Lecture notes, University of Alberta},
title = {{Online learning}},
year = {2011}
}
@article{wong2005what's,
address = {Cambridge, MA, USA},
author = {Wong, Weng-Keen and Moore, Andrew and Cooper, Gregory and Wagner, Michael},
issn = {1533-7928},
journal = {J. Mach. Learn. Res.},
pages = {1961--1998},
publisher = {MIT Press},
title = {{What's Strange About Recent Events (WSARE): An Algorithm for the Early Detection of Disease Outbreaks}},
volume = {6},
year = {2005}
}
@inproceedings{UNK10,
author = {Uchiya, T and Nakamura, A and Kudo, M},
booktitle = {Proceedings of the 21st International Conference on Algorithmic Learning Theory (ALT)},
title = {{Algorithms for Adversarial Bandit Problems with Multiple Plays}},
year = {2010}
}
@article{nagano2007strongly,
author = {Nagano, K},
journal = {Discrete Optimization},
number = {3-4},
pages = {349--359},
publisher = {Elsevier},
title = {{A strongly polynomial algorithm for line search in submodular polyhedra}},
volume = {4},
year = {2007}
}
@inproceedings{hoi2006learning,
address = {Washington, DC, USA},
annote = {comps{\_}distancX},
author = {Hoi, Steven C H and Liu, Wei and Lyu, Michael R and Ma, Wei-Ying},
booktitle = {CVPR '06: Proceedings of the 2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition},
doi = {http://dx.doi.org/10.1109/CVPR.2006.167},
isbn = {0-7695-2597-0},
pages = {2072--2078},
publisher = {IEEE Computer Society},
title = {{Learning Distance Metrics with Contextual Constraints for Image Retrieval}},
year = {2006}
}
@misc{tran-thang2012knapsack,
author = {Tran-Thanh, Long and Chapman, Archie C. and Rogers, Alex and Jennings, Nicholas R.},
booktitle = {AAAI},
title = {{Knapsack Based Optimal Policies for Budget-Limited Multi-Armed Bandits.}},
urldate = {2014-10-16},
year = {2012}
}
@inproceedings{narang2013signal,
abstract = {In this paper, we propose a novel algorithm to interpolate data defined on graphs, using signal processing concepts. The interpolation of missing values from known samples appears in various applications, such as matrix/vector completion, sampling of high-dimensional data, semi-supervised learning etc. In this paper, we formulate the data interpolation problem as a signal reconstruction problem on a graph, where a graph signal is defined as the information attached to each node (scalar or vector values mapped to the set of vertices/edges of the graph). We use recent results for sampling in graphs to find classes of bandlimited (BL) graph signals that can be reconstructed from their partially observed samples. The interpolated signal is obtained by projecting the input signal into the appropriate BL graph signal space. Additionally, we impose a `bilateral' weighting scheme on the links between known samples, which further improves accuracy. We use our proposed method for collaborative filtering in recommendation systems. Preliminary results show a very favorable trade-off between accuracy and complexity, compared to state of the art algorithms.},
author = {Narang, Sunil K. and Gadde, Akshay and Ortega, Antonio},
booktitle = {International Conference on Acoustics, Speech and Signal Processing},
keywords = {Graph signal processing,recommendation systems,sampling in graphs,spectral graph theory},
title = {{Signal processing techniques for interpolation in graph structured data}},
year = {2013}
}
@book{GKKW02,
author = {Gy{\"{o}}rfi, L and Kohler, M and Krzyzak, A and Walk, H},
publisher = {Springer},
title = {{A Distribution-Free Theory of Nonparametric Regression}},
year = {2001}
}
@inproceedings{lazarevic2005feature,
address = {New York, NY, USA},
annote = {comps{\_}ano},
author = {Lazarevic, Aleksandar and Kumar, Vipin},
booktitle = {KDD '05: Proceedings of the eleventh ACM SIGKDD international conference on Knowledge discovery in data mining},
doi = {http://doi.acm.org/10.1145/1081870.1081891},
isbn = {1-59593-135-X},
pages = {157--166},
publisher = {ACM},
title = {{Feature bagging for outlier detection}},
year = {2005}
}
@article{gilks1995adaptive,
abstract = {Gibbs sampling is a powerful technique for statistical inference. It involves little more than sampling from full conditional distributions, which can be both complex and computationally expensive to evaluate. Gilks and Wild have shown that in practice full conditionals are often log-concave, and they proposed a method of adaptive rejection sampling for efficiently sampling from univariate log-concave distributions. In this paper, to deal with non-log-concave full conditional distributions, we generalize adaptive rejection sampling to include a Hastings-Metropolis algorithm step. One important field of application in which statistical models may lead to non-log-concave full conditionals is population pharmacokinetics. Here, the relationship between drug dose and blood or plasma concentration in a group of patients typically is modelled by using non-linear mixed effects models. Often, the data used for analysis are routinely collected hospital measurements, which tend to be noisy and irregular. Consequently, a robust (t-distributed) error structure is appropriate to account for outlying observations and/or patients. We propose a robust non-linear full probability model for population pharmacokinetic data. We demonstrate that our method enables Bayesian inference for this model, through an analysis of antibiotic administration in new-born babies.},
author = {Gilks, W. R. and Best, N. G. and Tan, K. K. C.},
journal = {Journal of the Royal Statistical Society. Series C (Applied Statistics)},
keywords = {bayesian computation,gibbs sampling,markov chain monte carlo,method,metropolis algorithm,pharmacokinetic model,random variate generation},
number = {4},
pages = {455--472},
title = {{Adaptive rejection metropolis sampling within Gibbs sampling}},
volume = {44},
year = {1995}
}
@inproceedings{cai2014comparison,
author = {Cai, Zhuhua and Gao, Zekai J and Luo, Shangyu and Perez, Luis L and Vagena, Zografoula and Jermaine, Christopher},
booktitle = {SIGMOD},
title = {{A comparison of platforms for implementing and running very large scale machine learning algorithms}},
year = {2014}
}
@article{dean1989model,
author = {Dean, Thomas and Kanazawa, Keiji},
journal = {Computational Intelligence},
pages = {142--150},
title = {{A Model for Reasoning about Persistence and Causation}},
volume = {5},
year = {1989}
}
@inproceedings{Vov90,
author = {Vovk, V},
booktitle = {Proceedings of the third annual workshop on Computational learning theory (COLT)},
pages = {371--386},
title = {{Aggregating strategies}},
year = {1990}
}
@book{GP,
author = {Rasmussen, C E and Williams, C},
publisher = {MIT Press},
title = {{Gaussian Processes for Machine Learning}},
year = {2006}
}
@article{astrom1965optimal,
author = {Astrom, Karl},
journal = {Journal of Mathematical Analysis and Applications},
number = {1},
pages = {174--205},
title = {{Optimal Control of {\{}Markov{\}} Processes with Incomplete State Information}},
volume = {10},
year = {1965}
}
@article{neill2010multivariate,
abstract = {Abstract We present the multivariate Bayesian scan statistic (MBSS), a general framework for event detection and characterization in$\backslash$nmultivariate spatial time series data. MBSS integrates prior information and observations from multiple data streams in a$\backslash$nprincipled Bayesian framework, computing the posterior probability of each type of event in each space-time region. MBSS learns$\backslash$na multivariate Gamma-Poisson model from historical data, and models the effects of each event type on each stream using expert$\backslash$nknowledge or labeled training examples. We evaluate MBSS on various disease surveillance tasks, detecting and characterizing$\backslash$noutbreaks injected into three streams of Pennsylvania medication sales data. We demonstrate that MBSS can be used both as$\backslash$na “general” event detector, with high detection power across a variety of event types, and a “specific” detector that incorporates$\backslash$nprior knowledge of an event's effects to achieve much higher detection power. MBSS has many other advantages over previous$\backslash$nevent detection approaches, including faster computation and easy interpretation and visualization of results, and allows$\backslash$nfaster and more accurate event detection by integrating information from the multiple streams. Most importantly, MBSS can$\backslash$nmodel and differentiate between multiple event types, thus distinguishing between events requiring urgent responses and other,$\backslash$nless relevant patterns in the data.},
author = {Neill, Daniel B. and Cooper, Gregory F.},
doi = {10.1007/s10994-009-5144-4},
issn = {0885-6125},
journal = {Machine Learning},
pages = {261--282},
title = {{A multivariate Bayesian scan statistic for early event detection and characterization}},
volume = {79},
year = {2010}
}
@inproceedings{balluchi00automotiveengine,
author = {Balluchi, A and Benvenuti, L and {Di Benedetto}, M D and Pinello, C and Sangiovanni-Vincentelli, A L},
booktitle = {Proceedings of the IEEE},
pages = {888--912},
title = {{Automotive engine control and hybrid systems: challenges and opportunities}},
year = {2000}
}
@book{fujishige2005submodular,
author = {Fujishige, Satoru},
series = {Annals of discrete mathematics},
title = {{Submodular functions and optimization}},
year = {2005}
}
@inproceedings{boularias2011model,
abstract = {We consider the problem of imitation learning where the examples, demonstrated by an expert, cover only a small part of a large state space. Inverse Reinforcement Learning (IRL) provides an efficient tool for generalizing the demonstration, based on the assumption that the expert is optimally acting in a Markov Decision Process (MDP). Past work on IRL requires that an accurate model of the underlying MDP is known. However, this requirement can hardly be satisfied in practice, as learning a model of a dynamical system with a large, or continuous, state space is a challenging task. In this paper, we propose a model-free IRL algorithm, where the relative entropy between the empirical distribution of the trajectories under a uniform policy and their distribution under the learned policy is minimized by stochastic gradient descent. We compare this new approach to well-known IRL algorithms using approximate MDP models. Empirical results on simulated car racing, gridworld and ball-in-a-cup problems show that our approach is able to learn good policies from a small number of demonstrations.},
author = {Boularias, Abdeslam and Kober, Jens and Peters, Jan},
booktitle = {Proceedings of Fourteenth International Conference on Artificial Intelligence and Statistics},
keywords = {learning,statistics {\&} optimisation},
pages = {182--189},
title = {{Model-free inverse reinforcement learning}},
url = {http://eprints.pascal-network.org/archive/00008041/},
year = {2011}
}
@inproceedings{davis2007information-theoretic,
address = {New York, NY, USA},
annote = {comps{\_}distancX},
author = {Davis, Jason V and Kulis, Brian and Jain, Prateek and Sra, Suvrit and Dhillon, Inderjit S},
booktitle = {ICML '07: Proceedings of the 24th international conference on Machine learning},
doi = {http://doi.acm.org/10.1145/1273496.1273523},
isbn = {978-1-59593-793-3},
pages = {209--216},
publisher = {ACM},
title = {{Information-theoretic metric learning}},
year = {2007}
}
@article{fine2001efficient,
author = {Fine, S and Scheinberg, K},
journal = {Journal of Machine Learning Research},
pages = {243--264},
title = {{Efficient {\{}SVM{\}} training using low-rank kernel representations}},
volume = {2},
year = {2001}
}
@inproceedings{neu12o-ssp,
author = {Neu, G and Gy{\"{o}}rgy, A and Szepesv{\'{a}}ri, $\backslash$relax Cs. $\backslash$textCs},
booktitle = {Proceedings of the 23rd Annual Conference on Learning Theory (COLT)},
pages = {231--243},
title = {{The Online Loop-free Stochastic Shortest-Path Problem}},
year = {2013}
}
@article{burnetas1996optimal,
abstract = {Consider the problem of sequential sampling from m statistical populations to maximize the expected sum of outcomes in the long run. Under suitable assumptions on the unknown parameters u gQpopulations to maximize the expected sum of outcomes in the lon, it is shown that there exists a class CR of adaptive policies with the following properties: i. The expected n horizon reward p 0Vn u . under any policy p 0 in CR is equal to nm*u .yMu .log nqolog n., as n{\textordfeminine}`, where m*u . is the largest population mean and Mu . is a constant. ii. Policies in CR are asymptotically optimal within a larger class CUF of ‘‘uniformly fast convergent'' policies in the sense that lim . p 0 .. n{\textordfeminine}` nm* u y Vn u r nm*u .yVnp u ..F1, for any p gCUF and any u gQ such that Mu .)0. Policies in CR are specified via easily computable indices, defined as unique solutions to dual problems that arise naturally from the functional form of Mu .. In addition, the assumptions are verified for populations specified by nonparametric discrete univariate distributions with finite support. In the case of normal populations with unknown means and variances, we leave as an open problem the verification of one assumption.},
author = {Burnetas, Apostolos N. and Katehakis, Michael N.},
journal = {Advances in Applied Mathematics},
number = {2},
pages = {122--142},
title = {{Optimal adaptive policies for sequential allocation problems}},
volume = {17},
year = {1996}
}
@techreport{musco2016provably,
abstract = {We give the first algorithms for kernel matrix approximation that run in time linear in the number of data points and output an approximation which gives provable guarantees when used in many downstream learning tasks, including kernel principal component analysis, kernel {\$}k{\$}-means clustering, kernel ridge regression, and kernel canonical correlation analysis. Our methods require just {\$}\backslashtilde O(n\backslashcdot k){\$} kernel evaluations and {\$}\backslashtilde O(n \backslashcdot k{\^{}}2){\$} additional runtime, where {\$}n{\$} is the number of training data points and {\$}k{\$} is a target rank or effective dimensionality parameter. These runtimes are significantly sub-linear in the size of the {\$}n \backslashtimes n{\$} kernel matrix and apply to any kernel matrix, without assuming regularity or incoherence conditions. The algorithms are based on a ridge leverage score Nystr$\backslash$"om sampling scheme (RLS-Nystr$\backslash$"om) which was recently shown to yield strong kernel approximations, but which had no efficient implementation. We address this shortcoming by introducing fast recursive sampling methods for RLS-Nystr$\backslash$"om, while at the same time proving extended approximation guarantees for this promising new method.},
archivePrefix = {arXiv},
arxivId = {1605.07583},
author = {Musco, Cameron and Musco, Christopher},
eprint = {1605.07583},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Musco, Musco - 2016 - Provably useful kernel matrix approximation in linear time.pdf:pdf},
month = {may},
title = {{Provably useful kernel matrix approximation in linear time}},
url = {http://arxiv.org/abs/1605.07583},
year = {2016}
}
@article{Pol67,
author = {Polyak, B},
journal = {Soviet Math. Doklady},
pages = {33--36},
title = {{A general method for solving extremal problems}},
volume = {174},
year = {1967}
}
@article{ando1979concavity,
author = {Ando, T},
journal = {Linear Algebra and its Applications},
pages = {203--241},
publisher = {Elsevier},
title = {{Concavity of certain maps on positive definite matrices and applications to {\{}H{\}}adamard products}},
volume = {26},
year = {1979}
}
@article{bikel04intricacies,
address = {Cambridge, MA, USA},
author = {Bikel, Daniel M},
doi = {http://dx.doi.org/10.1162/0891201042544929},
issn = {0891-2017},
journal = {Computational Linguistics},
number = {4},
pages = {479--511},
publisher = {MIT Press},
title = {{Intricacies of {\{}C{\}}ollins' Parsing Model}},
volume = {30},
year = {2004}
}
@book{cover-thomas-1991,
author = {Cover, T and Thomas, J A},
publisher = {Wiley},
title = {{Elements of Information Theory}},
year = {1991}
}
@incollection{gorban2009principal,
author = {Gorban, Alexander and Zinovyev, Andrei},
booktitle = {Handbook of Research on Machine Learning Applications and Trends: Algorithms, Methods and Techniques},
pages = {28--59},
publisher = {Information Science Reference},
title = {{Principal Graphs and Manifolds}},
year = {2009}
}
@article{Kathuria2016,
abstract = {Gaussian Process bandit optimization has emerged as a powerful tool for optimizing noisy black box functions. One example in machine learning is hyper-parameter optimization where each evaluation of the target function requires training a model which may involve days or even weeks of computation. Most methods for this so-called "Bayesian optimization" only allow sequential exploration of the parameter space. However, it is often desirable to propose batches or sets of parameter values to explore simultaneously, especially when there are large parallel processing facilities at our disposal. Batch methods require modeling the interaction between the different evaluations in the batch, which can be expensive in complex scenarios. In this paper, we propose a new approach for parallelizing Bayesian optimization by modeling the diversity of a batch via Determinantal point processes (DPPs) whose kernels are learned automatically. This allows us to generalize a previous result as well as prove better regret bounds based on DPP sampling. Our experiments on a variety of synthetic and real-world robotics and hyper-parameter optimization tasks indicate that our DPP-based methods, especially those based on DPP sampling, outperform state-of-the-art methods.},
archivePrefix = {arXiv},
arxivId = {1611.04088},
author = {Kathuria, Tarun and Deshpande, Amit and Kohli, Pushmeet},
eprint = {1611.04088},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Kathuria, Deshpande, Kohli - 2016 - Batched Gaussian Process Bandit Optimization via Determinantal Point Processes.pdf:pdf},
month = {nov},
title = {{Batched Gaussian Process Bandit Optimization via Determinantal Point Processes}},
url = {http://arxiv.org/abs/1611.04088},
year = {2016}
}
@inproceedings{maddison2014a,
author = {Maddison, Chris J and Tarlow, Daniel and Minka, Tom},
booktitle = {Neural Information Processing Systems},
title = {{A* sampling}},
year = {2014}
}
@inproceedings{ICML2011Nagano_506,
author = {Nagano, K and Kawahara, Y and Aihara, K},
booktitle = {Proc. ICML},
title = {{Size-constrained Submodular Minimization through Minimum Norm Base}},
year = {2011}
}
@inproceedings{poupart2002piecewise,
author = {Poupart, Pascal and Boutilier, Craig and Patrascu, Relu and Schuurmans, Dale},
booktitle = {Proceedings of the 18th National Conference on Artificial Intelligence},
pages = {292--299},
title = {{Piecewise Linear Value Function Approximation for Factored {\{}MDPs{\}}}},
year = {2002}
}
@inproceedings{chapman2006comparison,
abstract = {Automated syndromic surveillance systems often classify patients into syndromic categories based on free-text chief complaints. Chief complaints (CC) demonstrate low to moderate sensitivity in identify-ing syndromic cases. Emergency Department (ED) reports promise more detailed clinical information that may increase sensitivity of detection.},
author = {Chapman, Wendy W and Dowling, John N and Cooper, Gregory F and Hauskrecht, Milos and Valko, Michal},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Chapman et al. - 2006 - A Comparison of Chief Complaints and Emergency Department Reports for Identifying Patients with Acute Lower Resp.pdf:pdf},
keywords = {misovalko},
mendeley-tags = {misovalko},
title = {{A Comparison of Chief Complaints and Emergency Department Reports for Identifying Patients with Acute Lower Respiratory Syndrome}},
year = {2006}
}
@inproceedings{koutis2011a-nearly-m,
author = {Koutis, Ioannis and Miller, Gary L and Peng, Richard},
booktitle = {{\{}IEEE{\}} 52nd Annual Symposium on Foundations of Computer Science, {\{}FOCS{\}}},
pages = {590--598},
title = {{A Nearly-m log n Time Solver for {\{}SDD{\}} Linear Systems}},
year = {2011}
}
@article{zong2016cascading,
author = {Zong, Shi and Ni, Hao and Sung, Kenny and Ke, Nan Rosemary and Wen, Zheng and Kveton, Branislav},
journal = {arXiv preprint arXiv:1603.05359},
title = {{Cascading Bandits for Large-Scale Recommendation Problems}},
year = {2016}
}
@book{press1992numerical,
address = {Cambridge, MA},
author = {Press, William and Teukolsky, Saul and Vetterling, William and Flannery, Brian},
publisher = {Cambridge University Press},
title = {{Numerical Recipes in C}},
year = {1992}
}
@article{belkin2006manifold,
author = {Belkin, Mikhail and Niyogi, Partha and Sindhwani, Vikas},
journal = {Journal of Machine Learning Research},
pages = {2399--2434},
title = {{Manifold regularization: A geometric framework for learning from labeled and unlabeled examples}},
volume = {7},
year = {2006}
}
@book{zhang2005schur,
author = {Zhang, Fuzhen},
publisher = {Springer},
title = {{The Schur complement and its applications}},
volume = {4},
year = {2005}
}
@inproceedings{smola2000sparse,
author = {Smola, A and Scholkopf, B},
booktitle = {Proceedings of the 17th International Conference on Machine Learning},
title = {{Sparse greedy matrix approximation for machine learning}},
year = {2000}
}
@article{carpentier2013honest,
abstract = {We study the problem of constructing honest and adaptive confidence intervals for the tail coefficient in the second order Pareto model, when the second order coefficient is unknown. This problem is translated into a testing problem on the second order parameter. By constructing an appropriate model and an associated test statistic, we provide a uniform and adaptive confidence interval for the first order parameter. We also provide an almost matching lower bound, which proves that the result is minimax optimal up to a logarithmic factor.},
archivePrefix = {arXiv},
arxivId = {1312.2968},
author = {Carpentier, Alexandra and Kim, Arlene K. H.},
eprint = {1312.2968},
journal = {Electronic Journal of Statistics},
title = {{Honest and adaptive confidence interval for the tail coefficient in the Pareto model}},
year = {2014}
}
@article{megiddo74optimal,
abstract = {The concept of an optimal flow in a multiple source, multiple sink network is defined. It generalizes maximal flow in a single source, single sink network. An existence proof and an algorithm are given.},
author = {Megiddo, Nimrod},
journal = {Mathematical Programming},
number = {1},
pages = {97--107},
title = {{Optimal flows in networks with multiple sources and sinks}},
volume = {7},
year = {1974}
}
@inproceedings{chang2004locally,
address = {New York, NY, USA},
annote = {comps{\_}distancX},
author = {Chang, Hong and Yeung, Dit-Yan},
booktitle = {ICML '04: Proceedings of the twenty-first international conference on Machine learning},
doi = {http://doi.acm.org/10.1145/1015330.1015391},
isbn = {1-58113-828-5},
pages = {20},
publisher = {ACM},
title = {{Locally linear metric adaptation for semi-supervised clustering}},
year = {2004}
}
@article{mcpherson2001birds,
author = {McPherson, Miller and Smith-Lovin, Lynn and Cook, James},
journal = {Annual Review of Sociology},
pages = {415--444},
title = {{Birds of a feather: Homophily in social networks}},
volume = {27},
year = {2001}
}
@book{crame1999mathematical,
author = {Cram{\'{e}}r, H},
isbn = {9780691005478},
publisher = {Princeton University Press},
series = {Princeton landmarks in mathematics and physics},
title = {{Mathematical methods of statistics}},
url = {http://books.google.com/books?id=CRTKKaJO0DYC},
year = {1999}
}
@inproceedings{busoniu2012optimistic,
abstract = {The reinforcement learning community has recently intensified its interest in online plan-ning methods, due to their relative inde-pendence on the state space size. However, tight near-optimality guarantees are not yet available for the general case of stochastic Markov decision processes and closed-loop, state-dependent planning policies. We there-fore consider an algorithm related to AO* that optimistically explores a tree represen-tation of the space of closed-loop policies, and we analyze the near-optimality of the action it returns after n tree node expan-sions. While this optimistic planning requires a finite number of actions and possible next states for each transition, its asymptotic per-formance does not depend directly on these numbers, but only on the subset of nodes that significantly impact near-optimal poli-cies. We characterize this set by introduc-ing a novel measure of problem complexity, called the near-optimality exponent. Special-izing the exponent and performance bound for some interesting classes of MDPs illus-trates the algorithm works better when there are fewer near-optimal policies and less uni-form transition probabilities.},
author = {Buşoniu, Lucian and Munos, R{\'{e}}mi},
booktitle = {International Conference on Artificial Intelligence and Statistics},
title = {{Optimistic planning for Markov decision processes}},
year = {2012}
}
@article{hodge2004survey,
address = {Norwell, MA, USA},
annote = {comps{\_}ano},
author = {Hodge, Victoria and Austin, Jim},
doi = {http://dx.doi.org/10.1023/B:AIRE.0000045502.10941.a9},
issn = {0269-2821},
journal = {Artif. Intell. Rev.},
number = {2},
pages = {85--126},
publisher = {Kluwer Academic Publishers},
title = {{A Survey of Outlier Detection Methodologies}},
volume = {22},
year = {2004}
}
@article{queyranne1995scheduling,
author = {Queyranne, M and Schulz, A},
journal = {Integer Programming and Combinatorial Optimization},
pages = {307--320},
publisher = {Springer},
title = {{Scheduling unit jobs with compatible release dates on parallel machines with nonstationary speeds}},
volume = {920},
year = {1995}
}
@inproceedings{bennett1999semi-supervised,
author = {Bennett, Kristin and Demiriz, Ayhan},
booktitle = {Advances in Neural Information Processing Systems 11},
pages = {368--374},
title = {{Semi-Supervised Support Vector Machines}},
year = {1999}
}
@inproceedings{agrawal2013further,
abstract = {Thompson Sampling is one of the oldest heuristics for multi-armed bandit problems. It is a randomized algorithm based on Bayesian ideas, and has recently generated significant interest after several studies demonstrated it to have better empirical performance compared to the state of the art methods. In this paper, we provide a novel regret analysis for Thompson Sampling that simultaneously proves both the optimal problem-dependent bound of {\$}(1+\backslashepsilon)\backslashsum{\_}i \backslashfrac{\{}\backslashln T{\}}{\{}\backslashDelta{\_}i{\}}+O(\backslashfrac{\{}N{\}}{\{}\backslashepsilon{\^{}}2{\}}){\$} and the first near-optimal problem-independent bound of {\$}O(\backslashsqrt{\{}NT\backslashln T{\}}){\$} on the expected regret of this algorithm. Our near-optimal problem-independent bound solves a COLT 2012 open problem of Chapelle and Li. The optimal problem-dependent regret bound for this problem was first proven recently by Kaufmann et al. [ALT 2012]. Our novel martingale-based analysis techniques are conceptually simple, easily extend to distributions other than the Beta distribution, and also extend to the more general contextual bandits setting [Manuscript, Agrawal and Goyal, 2012].},
author = {Agrawal, Shipra and Goyal, Navin},
booktitle = {International Conference on Artificial Intelligence and Statistics},
title = {{Further optimal regret bounds for Thompson sampling}},
year = {2013}
}
@inproceedings{garivier2011kl,
author = {Garivier, Aur{\'{e}}lien and Capp{\'{e}}, Olivier},
booktitle = {Proceedings of the 24th annual Conference On Learning Theory},
keywords = {bandits},
mendeley-tags = {bandits},
series = {COLT '11},
title = {{The {\{}KL{\}}-{\{}UCB{\}} algorithm for bounded stochastic bandits and beyond}},
year = {2011}
}
@inproceedings{valko2005evolutionary,
abstract = {This paper presents an application of the biologically realistic JASTAP
neural network model to classification tasks. The JASTAP neural network
model is presented as an alternative to the basic multi-layer perceptron
model. An evolutionary procedure previously applied to the simultaneous
solution of feature selection and neural network training on standard
multi-layer perceptrons is extended with JASTAP model. Preliminary
results on IRIS standard data set give evidence that this extension
allows the use of smaller neural networks that can handle noisier
data without any degradation in classification accuracy.},
author = {Valko, Michal and Marques, Nuno Cavalheiro and Castelani, Marco},
booktitle = {Proceedings of 2005 Portuguese Conference on Artificial Intelligence},
editor = {Et al., Bento},
keywords = {misovalko},
mendeley-tags = {misovalko},
pages = {24--32},
publisher = {IEEE},
title = {{Evolutionary Feature Selection for Spiking Neural Network Pattern Classifiers}},
year = {2005}
}
@inproceedings{IgelHusken00:iRprop,
author = {Igel, Christian and H{\"{u}}sken, Michael},
booktitle = {Proceedings of the Second International ICSC Symposium on Neural Computation (NC 2000)},
pages = {115--121},
publisher = {ICSC Academic Press},
title = {{Improving the {\{}R{\}}prop Learning Algorithm}},
url = {citeseer.ist.psu.edu/igel00improving.html},
year = {2000}
}
@inproceedings{ng2004inverted,
author = {Ng, Andrew and Coates, Adam and Diel, Mark and Ganapathi, Varun and Schulte, Jamie and Tse, Ben and Berger, Eric and Liang, Eric},
booktitle = {International Symposium on Experimental Robotics},
title = {{Inverted Autonomous Helicopter Flight via Reinforcement Learning}},
year = {2004}
}
@inproceedings{bubeck2015entropic,
abstract = {We prove that the Cram$\backslash$'er transform of the uniform measure on a convex body in {\$}\backslashmathbb{\{}R{\}}{\^{}}n{\$} is a {\$}(1+o(1)) n{\$}-self-concordant barrier, improving a seminal result of Nesterov and Nemirovski. This gives the first explicit construction of a universal barrier for convex bodies with optimal self-concordance parameter. The proof is based on basic geometry of log-concave distributions, and elementary duality in exponential families.},
author = {Bubeck, S{\'{e}}bastien and Eldan, Ronen},
booktitle = {Conference on Learning Theory},
title = {{The entropic barrier: A simple and optimal universal self-concordant barrier}},
year = {2015}
}
@inproceedings{koller1999computing,
author = {Koller, Daphne and Parr, Ronald},
booktitle = {Proceedings of the 16th International Joint Conference on Artificial Intelligence},
pages = {1332--1339},
title = {{Computing Factored Value Functions for Policies in Structured {\{}MDPs{\}}}},
year = {1999}
}
@article{barabasi1999emergence,
abstract = {Systems as diverse as genetic networks or the world wide web are best described as networks with complex topology. A common property of many large networks is that the vertex connectivities follow a scale-free power-law distribution. This feature is found to be a consequence of the two generic mechanisms that networks expand continuously by the addition of new vertices, and new vertices attach preferentially to already well connected sites. A model based on these two ingredients reproduces the observed stationary scale-free distributions, indicating that the development of large networks is governed by robust self-organizing phenomena that go beyond the particulars of the individual systems.},
author = {Barab{\'{a}}si, Albert-L{\'{a}}szl{\'{o}} and Albert, R{\'{e}}ka},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Barab{\'{a}}si, Albert - 1999 - Emergence of scaling in random networks.pdf:pdf},
journal = {Science},
keywords = {complex networks,network,networks},
mendeley-tags = {complex networks,network,networks},
pages = {11},
title = {{Emergence of scaling in random networks}},
volume = {286},
year = {1999}
}
@book{Powell07,
author = {Powell, Warren B},
publisher = {John Wiley and Sons, New York},
title = {{Approximate Dynamic Programming: Solving the curses of dimensionality}},
year = {2007}
}
@article{Hofmann2011,
abstract = {In this paper we give an overview of and outlook on research at the intersection of information retrieval (IR) and contextual bandit problems. A critical problem in information retrieval is online learning to rank, where a search engine strives to improve the quality of the ranked result lists it presents to users on the basis of those users' interactions with those result lists. Recently, researchers have started to model interactions between users and search engines as contextual bandit problems, and initial methods for learning in this setting have been devised. Our research focuses on two aspects: balancing exploration and exploitation and inferring preferences from implicit user interactions. This paper summarizes our recent work on online learning to rank for information retrieval and points out challenges that are characteristic of this application area.},
author = {Hofmann, Katja},
journal = {NIPS 2011 Proceedings of the Conference on Neural Information Processing Systems Workshop on Bayesian Optimization Experimental Design and Bandits Theory and Applications},
pages = {1--5},
title = {{Contextual Bandits for Information Retrieval}},
url = {http://www.cs.ubc.ca/{~}hutter/nips2011workshop/papers{\_}and{\_}posters/nips-2012-rl4ir.pdf},
year = {2011}
}
@techreport{meuleau2001exploration,
author = {Meuleau, Nicolas and Peshkin, Leonid and Kim, Kee-Eung},
institution = {Massachusetts Institute of Technology},
number = {1713 (AI Memo 2001-003)},
title = {{Exploration in Gradient-Based Reinforcement Learning}},
year = {2001}
}
@inproceedings{amit02parametric,
author = {Amit, R and Mataric, Maja J},
pages = {944--945},
title = {{A Correspondence Metric for Imitation}}
}
@inproceedings{chen2010scalable,
author = {Chen, Wei and Wang, Chi and Wang, Yajun},
booktitle = {Knowledge Discovery and Data Mining},
title = {{Scalable influence maximization for prevalent viral marketing in large-scale social networks}},
year = {2010}
}
@article{mannor2004sample,
author = {Mannor, S and Tsitsiklis, J N},
journal = {Journal of Machine Learning Research},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {623--648},
title = {{The Sample Complexity of Exploration in the Multi-Armed Bandit Problem}},
volume = {5},
year = {2004}
}
@inproceedings{syed2010unsupervised,
author = {Syed, Zeeshan and Rubinfeld, Ilan},
booktitle = {ICML},
editor = {F{\"{u}}rnkranz, Johannes and Joachims, Thorsten},
pages = {1023--1030},
publisher = {Omnipress},
title = {{Unsupervised Risk Stratification in Clinical Datasets: Identifying Patients at Risk of Rare Outcomes}},
year = {2010}
}
@article{choquet1953theory,
author = {Choquet, G},
journal = {Ann. Inst. Fourier},
pages = {131--295},
title = {{Theory of capacities}},
volume = {5},
year = {1954}
}
@unpublished{Bub11,
annote = {Lecture Notes},
author = {Bubeck, S},
title = {{Introduction to Online Optimization}},
year = {2011}
}
@article{heckerman1995learning,
author = {Heckerman, D and Geiger, D and Chickering, D M},
journal = {Machine Learning},
number = {3},
pages = {197--243},
publisher = {Springer},
title = {{Learning {\{}B{\}}ayesian networks: The combination of knowledge and statistical data}},
volume = {20},
year = {1995}
}
@inproceedings{negahban2008joint,
author = {Negahban, S and Wainwright, M J},
booktitle = {Adv. NIPS},
title = {{Joint support recovery under high-dimensional scaling: Benefits and perils of $\backslash$ell{\_}1-$\backslash$ell{\_}$\backslash$infty-regularization}},
year = {2008}
}
@incollection{taskar2004max-margin,
address = {Cambridge, MA},
author = {Taskar, Ben and Guestrin, Carlos and Koller, Daphne},
booktitle = {Advances in Neural Information Processing Systems 16},
editor = {Thrun, Sebastian and Saul, Lawrence and Sch{\"{o}}lkopf, Bernhard},
keywords = {Markov models,PAC bounds,graphical models,kernel methods,large margin methods,machine learning,quadratic programming,statistical learning theory,structured data},
publisher = {MIT Press},
title = {{Max-Margin Markov Networks}},
year = {2004}
}
@article{cardoso2003dependence,
author = {Cardoso, J F},
journal = {The Journal of Machine Learning Research},
pages = {1177--1203},
publisher = {JMLR. org},
title = {{Dependence, correlation and Gaussianity in independent component analysis}},
volume = {4},
year = {2003}
}
@article{yuille2003concave,
author = {Yuille, A L and Rangarajan, A},
journal = {Neural Computation},
number = {4},
pages = {915--936},
publisher = {MIT Press},
title = {{The concave-convex procedure}},
volume = {15},
year = {2003}
}
@phdthesis{kassel1995comparison,
address = {Cambridge, MA, USA},
author = {Kassel, Robert Howard},
publisher = {Massachusetts Institute of Technology},
school = {MIT Spoken Language Systems Group},
title = {{A comparison of approaches to on-line handwritten character recognition}},
year = {1995}
}
@article{manne1960linear,
author = {Manne, Alan},
journal = {Management Science},
number = {3},
pages = {259--267},
title = {{Linear Programming and Sequential Decisions}},
volume = {6},
year = {1960}
}
@inproceedings{NeuTDK2006,
author = {Neu, Gergely},
booktitle = {BME-VIK TDK'06},
title = {{Inverse Reinforcement Learning via the Method of Natural Gradients}},
year = {2006}
}
@book{HL01,
author = {Hiriart-Urruty, J.-B. and Lemar{\'{e}}chal, C},
publisher = {Springer},
title = {{Fundamentals of Convex Analysis}},
year = {2001}
}
@phdthesis{neu13thesis,
author = {Neu, Gergely},
school = {Budapest University of Technology and Economics},
title = {{Online learning in non-stochastic {\{}M{\}}arkov decision processes}},
year = {2013}
}
@inproceedings{moonesignhe2006outlier,
address = {Washington, DC, USA},
author = {Moonesignhe, H D K and Tan, Pang-Ning},
booktitle = {ICTAI '06: Proceedings of the 18th IEEE International Conference on Tools with Artificial Intelligence},
doi = {http://dx.doi.org/10.1109/ICTAI.2006.94},
isbn = {0-7695-2728-0},
pages = {532--539},
publisher = {IEEE Computer Society},
title = {{Outlier Detection Using Random Walks}},
year = {2006}
}
@article{chomsky56,
author = {Chomsky, N},
journal = {IEEE Transactions on Information Theory},
keywords = {3mdl,cfg,chomsky,fsm,language},
number = {3},
pages = {113--124},
title = {{Three models for the description of language}},
url = {http://ieeexplore.ieee.org/xpls/abs{\_}all.jsp?arnumber=1056813},
volume = {2},
year = {1956}
}
@article{jewell1963markov-renewal,
author = {Jewell, William},
journal = {Operations Research},
number = {6},
pages = {938--948},
title = {{{\{}Markov{\}}-Renewal Programming. {\{}I{\}}: Formulation, Finite Return Models}},
volume = {11},
year = {1963}
}
@inproceedings{valko2012semi-supervised,
abstract = {In apprenticeship learning we aim to learn a good policy by observing the behavior of an expert or a set of experts. In particular, we consider the case where the expert acts so as to maximize an unknown reward function defined as a linear combination of a set of state features. In this paper, we consider the setting where we observe many sample trajectories (i.e., sequences of states) but only one or a few of them are labeled as experts' trajectories. We investigate the conditions under which the remaining unlabeled trajectories can help in learning a policy with a good performance. In particular, we define an extension to the max-margin inverse reinforcement learning proposed by Abbeel and Ng (2004) where, at each iteration, the max-margin optimization step is replaced by a semi-supervised optimization problem which favors classifiers separating clusters of trajectories. Finally, we report empirical results on two grid-world domains showing that the semi-supervised algorithm is able to output a better policy in fewer iterations than the related algorithm that does not take the unlabeled trajectories into account.},
author = {Valko, Michal and Ghavamzadeh, Mohammad and Lazaric, Alessandro},
booktitle = {The 24th Journal of Machine Learning Research Proceedings of the 10th European Workshop on Reinforcement Learning},
month = {jun},
pages = {131--241},
publisher = {Sparc},
title = {{Semi-supervised apprenticeship learning}},
url = {http://researchers.lille.inria.fr/{~}valko/hp/serve.php?what=publications/valko2012semi-supervised.pdf},
volume = {24},
year = {2012}
}
@article{gyorgy07sp,
author = {Gy{\"{o}}rgy, A and Linder, T and Lugosi, G and Ottucs{\'{a}}k, $\backslash$relax Gy.},
issn = {1532-4435},
journal = {Journal of Machine Learning Research},
pages = {2369--2403},
publisher = {JMLR.org},
title = {{The On-Line Shortest Path Problem Under Partial Monitoring}},
volume = {8},
year = {2007}
}
@article{best1990active,
author = {Best, M J and Chakravarti, N},
journal = {Mathematical Programming},
number = {1},
pages = {425--439},
publisher = {Springer},
title = {{Active set algorithms for isotonic regression; a unifying framework}},
volume = {47},
year = {1990}
}
@incollection{easley2010networks,
abstract = {Are all film stars linked to Kevin Bacon? Why do the stock markets rise and fall sharply on the strength of a vague rumour? How does gossip spread so quickly? Are we all related through six degrees of separation? There is a growing awareness of the complex networks that pervade modern society. We see them in the rapid growth of the Internet, the ease of global communication, the swift spread of news and information, and in the way epidemics and financial crises develop with startling speed and intensity. This introductory book on the new science of networks takes an interdisciplinary approach, using economics, sociology, computing, information science and applied mathematics to address fundamental questions about the links that connect us, and the ways that our decisions can have consequences for others.},
author = {Easley, David and Kleinberg, Jon},
publisher = {Cambridge University Press},
title = {{Networks, Crowds, and Markets: Reasoning About a Highly Connected World}},
year = {2010}
}
@inproceedings{agosta2013mixture,
abstract = {We model a little studied type of traffic, namely the network traffic generated from endhosts. We introduce a parsimonious model of the marginal distribution for connection arrivals consisting of mixture models with both heavy and light-tailed component distributions. Our methodology assumes that the underlying user data can be fitted to one of several models, and we apply Bayesian model selection criterion to choose the preferred combination of components. Our experiments show that a simple Pareto-exponential mixture model is preferred over more complex alternatives, for a wide range of users. This model has the desirable property of modeling the entire distribution, effectively clustering the traffic into the heavy-tailed as well as the non-heavy-tailed components. Also this method quantifies the wide diversity in the observed endhost traffic.},
author = {Agosta, John Mark and Chandrashekar, Jaideep and Crovella, Mark and Taft, Nina and Ting, Daniel},
booktitle = {IEEE Proceedings of INFOCOM,},
doi = {10.1109/INFCOM.2013.6566768},
issn = {0743-166X},
keywords = {Approximation methods,Bayes methods,Bayesian model selection criterion,Computational modeling,Data models,Educational institutions,Mathematical model,Maximum likelihood estimation,Pareto distribution,connection arrivals,endhost network traffic,heavy-tailed component distributions,light-tailed component distributions,marginal distribution,parsimonious model,simple Pareto-exponential mixture,telecommunication networks,telecommunication traffic,traffic clustering,wide diversity},
pages = {225--229},
title = {{Mixture models of endhost network traffic}},
year = {2013}
}
@inproceedings{bartletthigh,
author = {Bartlett, Peter L and Dani, Varsha and Hayes, Thomas P and Kakade, Sham M and Rakhlin, Alexander and Tewari, Ambuj},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {335--342},
title = {{High-probability Regret Bounds for Bandit Online Linear Optimization}}
}
@incollection{weinberger2006distance,
address = {Cambridge, MA},
annote = {comps{\_}distance},
author = {Weinberger, Kilian and Blitzer, John and Saul, Lawrence},
booktitle = {Advances in Neural Information Processing Systems 18},
editor = {Weiss, Y and Sch{\"{o}}lkopf, B and Platt, J},
pages = {1473--1480},
publisher = {MIT Press},
title = {{Distance Metric Learning for Large Margin Nearest Neighbor Classification}},
url = {http://books.nips.cc/papers/files/nips18/NIPS2005{\_}0265.pdf},
year = {2006}
}
@book{catch-22,
author = {Heller, Joseph},
publisher = {Simon {\&} Schuster},
title = {{Catch-22}},
year = {1961}
}
@inproceedings{krause2005near,
author = {Krause, A and Guestrin, C},
booktitle = {Proc. UAI},
title = {{Near-optimal nonmyopic value of information in graphical models}},
year = {2005}
}
@book{DGL96,
author = {Devroye, L and Gy{\"{o}}rfi, L and Lugosi, G},
publisher = {Springer},
title = {{A Probabilistic Theory of Pattern Recognition}},
year = {1996}
}
@article{scholkopf1999estimating,
annote = {comps{\_}ano},
author = {Scholkopf, Bernhard and Platt, John C and Shawe-taylor, John and Smola, Alex J and Williamson, Robert C and Sch{\"{o}}lkopf, Bernhard},
journal = {Neural Computation},
pages = {2001},
title = {{Estimating the support of a high-dimensional distribution}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.60.9423},
volume = {13},
year = {1999}
}
@article{hauskrecht2012outlier,
abstract = {We develop and evaluate a data-driven approach for detecting unusual (anomalous) patient-management decisions using past patient cases stored in electronic health records (EHRs). Our hypothesis is that a patient-management decision that is unusual with respect to past patient care may be due to an error and that it is worthwhile to generate an alert if such a decision is encountered. We evaluate this hypothesis using data obtained from EHRs of 4486 post-cardiac surgical patients and a subset of 222 alerts generated from the data. We base the evaluation on the opinions of a panel of experts. The results of the study support our hypothesis that the outlier-based alerting can lead to promising true alert rates. We observed true alert rates that ranged from 25{\%} to 66{\%} for a variety of patient-management actions, with 66{\%} corresponding to the strongest outliers.},
author = {Hauskrecht, Milos and Batal, Iyad and Valko, Michal and Visweswaran, Shyam and Cooper, Gregory F and Clermont, Gilles},
doi = {10.1016/j.jbi.2012.08.004},
issn = {1532-0464},
journal = {Journal of Biomedical Informatics},
keywords = {Clinical alerting,Conditional outlier detection,Machine learning,Medical errors},
month = {feb},
number = {1},
pages = {47--55},
title = {{Outlier detection for patient monitoring and alerting}},
url = {http://www.sciencedirect.com/science/article/pii/S1532046412001281},
volume = {46},
year = {2013}
}
@inproceedings{kearns1999sparse,
abstract = {A critical issue for the application of Markov decision processes (MDPs) to realistic problems is how the complexity of planning scales with the size of the MDP. In stochastic environments with very large or infinite state spaces, traditional planning and reinforcement learning algorithms may be inapplicable, since their running time typically grows linearly with the state space size in the worst case. In this paper we present a new algorithm that, given only a generative model (a natural and common type of simulator) for an arbitrary MDP, performs on-line, near-optimal planning with a per-state running time that has no dependence on the number of states. The running time is exponential in the horizon time (which depends only on the discount factor $\gamma$ and the desired degree of approximation to the optimal policy). Our algorithm thus provides a different complexity trade-off than classical algorithms such as value iteration—rather than scaling linearly in both horizon time and state space size, our running time trades an exponential dependence on the former in exchange for no dependence on the latter. Our algorithm is based on the idea of sparse sampling. We prove that a randomly sampled look-ahead tree that covers only a vanishing fraction of the full look-ahead tree nevertheless suffices to compute near-optimal actions from any state of an MDP. Practical implementations of the algorithm are discussed, and we draw ties to our related recent results on finding a near-best strategy from a given class of strategies in very large partially observable MDPs.},
author = {Kearns, Michael and Mansour, Yishay and Ng, Andrew Y.},
booktitle = {International Conference on Artificial Intelligence and Statistics},
title = {{A sparse sampling algorithm for near-optimal planning in large Markov decision processes}},
year = {1999}
}
@article{ShMe99,
author = {Shamir, G I and Merhav, N},
journal = {IEEE Transactions on Information Theory},
pages = {1498--1519},
title = {{Low-complexity sequential lossless coding for piecewise-stationary memoryless sources}},
volume = {IT-45},
year = {1999}
}
@inproceedings{kveton2005mcmc,
author = {Kveton, Branislav and Hauskrecht, Milos},
booktitle = {Proceedings of the 19th International Joint Conference on Artificial Intelligence},
pages = {1346--1351},
title = {{An {\{}MCMC{\}} Approach to Solving Hybrid Factored {\{}MDPs{\}}}},
year = {2005}
}
@article{carpentier2014asimple,
archivePrefix = {arXiv},
arxivId = {1505.04627},
author = {Carpentier, Alexandra and Valko, Michal},
eprint = {1505.04627},
journal = {arXiv:1505.04627, http://arxiv.org/abs/1505.04627, ArXiv e-prints,},
title = {{Simple regret for infinitely many armed bandits}},
year = {2015}
}
@article{titsias2009efficient,
abstract = {Sampling functions in Gaussian process (GP) models is challenging because of the highly correlated posterior distribution. We describe an efficient Markov chain Monte Carlo algorithm for sampling from the posterior process of the GP model. This algorithm uses control variables which are auxiliary function values that provide a low dimensional representation of the function. At each iteration, the algorithm proposes new values for the control variables and generates the function from the conditional GP prior. The control variable input locations are found by continuously minimizing an objective function. We demonstrate the algorithm on regression and classification problems and we use it to estimate the parameters of a differential equation model of gene regulation.},
author = {Titsias, Michalis K and Lawrence, Neil D and Rattray, Magnus},
editor = {Koller, D and Schuurmans, D and Bengio, Y and Bottou, L},
journal = {Advances in Neural Information Processing Systems 21},
pages = {1681--1688},
publisher = {Citeseer},
title = {{Efficient Sampling for Gaussian Process Inference using Control Variables}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.159.74{\&}rep=rep1{\&}type=pdf},
volume = {21},
year = {2009}
}
@incollection{scholkopf1999kernel,
author = {Sch{\"{o}}lkopf, Bernhard and Smola, Alexander J. and M{\"{u}}ller, Klaus-Robert},
booktitle = {Advances in kernel methods},
pages = {327--352},
publisher = {MIT Press Cambridge, MA, USA},
title = {{Kernel principal component analysis}},
year = {1999}
}
@article{CBMS07,
author = {Cesa-Bianchi, Nicol{\`{o}} and Mansour, Yishay and Stoltz, Gilles},
journal = {Machine Learning},
number = {2-3},
pages = {321--352},
title = {{Improved second-order bounds for prediction with expert advice}},
volume = {66},
year = {2007}
}
@article{Sanchez-2003-Mislabeled,
author = {Sanchez, J S and Barandela, R and Marques, A I and Alejo, R and J., Badenas.},
journal = {Pattern Recognition Letteres 24},
pages = {1015--1022},
title = {{Analysis of New Techniques to Obtain Quality Training Sets.}},
year = {2003}
}
@book{haykin1994neural,
address = {Upper Saddle River, NJ, USA},
author = {Haykin, Simon},
edition = {1st},
isbn = {0023527617},
publisher = {Prentice Hall PTR},
title = {{Neural Networks: A Comprehensive Foundation}},
year = {1994}
}
@incollection{Bal97,
author = {Ball, K},
booktitle = {Flavors of Geometry},
editor = {Levy, S},
pages = {1--58},
publisher = {Cambridge University Press},
title = {{An Elementary Introduction to Modern Convex Geometry}},
year = {1997}
}
@article{dani2008price,
author = {Dani, Varsha and Hayes, Thomas and Kakade, Sham},
editor = {Platt, J C and Koller, D and Singer, Y and Roweis, S},
issn = {00368075},
journal = {Advances in Neural Information Processing Systems 20},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {1--8},
publisher = {MIT Press},
title = {{The Price of Bandit Information for Online Optimization}},
url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.71.4607{\&}rep=rep1{\&}type=pdf},
volume = {20},
year = {2008}
}
@inproceedings{hauskrecht2004linear,
author = {Hauskrecht, Milos and Kveton, Branislav},
booktitle = {Proceedings of the 14th International Conference on Automated Planning and Scheduling},
pages = {306--314},
title = {{Heuristic Refinements of Approximate Linear Programming for Factored Continuous-State {\{}Markov{\}} Decision Processes}},
year = {2004}
}
@techreport{golding1996idleness,
author = {Golding, Richard and Bosch, Peter and Wilkes, John},
institution = {Hewlett-Packard Laboratories},
number = {HPL-96-140},
title = {{Idleness Is Not Sloth}},
year = {1996}
}
@article{kolmogorov2004energy,
author = {Kolmogorov, V and Zabih, R},
journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
number = {2},
pages = {147--159},
publisher = {Published by the IEEE Computer Society},
title = {{What energy functions can be minimized via graph cuts?}},
volume = {26},
year = {2004}
}
@inproceedings{ziebart2008maximum,
abstract = {Recent research has shown the benefit of framing problems of imitation learning as solutions to Markov Decision Problems. This approach reduces the problem of learning to recovering a utility function that makes the behavior induced by a near-optimal policy closely mimic demonstrated behavior. In this work, we develop a probabilistic approach based on the principle of maximum entropy. Our approach provides a well-defined, globally normalized distribution over decisions, while providing the same performance guarantees as existing methods.We develop our technique in the context of modeling real-world navigation and driving behaviors where collected data is inherently noisy and imperfect. Our probabilistic approach enables modeling of route preferences as well as a powerful new approach to inferring destinations and routes based on partial trajectories.},
author = {Ziebart, Brian and Maas, Andrew L and Bagnell, J Andrew and Dey, Anind K},
booktitle = {Proc AAAI},
editor = {Archer, M},
file = {:Users/miki/Library/Application Support/Mendeley Desktop/Downloaded/Ziebart et al. - 2008 - Maximum Entropy Inverse Reinforcement Learning.pdf:pdf},
isbn = {9781577353683},
keywords = {irl},
mendeley-tags = {irl},
pages = {1433--1438},
publisher = {AAAI Press},
title = {{Maximum Entropy Inverse Reinforcement Learning}},
url = {http://www.aaai.org/Papers/AAAI/2008/AAAI08-227.pdf},
year = {2008}
}
@inproceedings{bartlettadaptive,
author = {Bartlett, Peter L and Hazan, Elad and Rakhlin, Alexander},
keywords = {bandits},
mendeley-tags = {bandits},
pages = {65--72},
title = {{Adaptive Online Gradient Descent.}}
}
@techreport{trick1993linear,
author = {Trick, Michael and Zin, Stanley},
institution = {Carnegie Mellon University},
title = {{A Linear Programming Approach to Solving Stochastic Dynamic Programs}},
year = {1993}
}
@inproceedings{zhang1995reinforcement,
author = {Zhang, Wei and Dietterich, Thomas},
booktitle = {Proceedings of the 14th International Joint Conference on Artificial Intelligence},
pages = {1114--1120},
title = {{A Reinforcement Learning Approach to Job-Shop Scheduling}},
year = {1995}
}
@article{subramanya2014graph,
author = {Subramanya, Amarnag and Talukdar, Partha Pratim},
journal = {Synthesis Lectures on Artificial Intelligence and Machine Learning},
number = {4},
pages = {1--125},
publisher = {Morgan {\&} Claypool Publishers},
title = {{Graph-Based Semi-Supervised Learning}},
volume = {8},
year = {2014}
}
@techreport{zhu2003semi-superviseda,
author = {Zhu, Xiaojin and Ghahramani, Zoubin and Lafferty, John},
booktitle = {Proceedings of the 20th International Conference on Machine Learning},
institution = {School of CS, CMU},
pages = {912--919},
title = {{Semi-Supervised Learning: From Gaussian Fields to Gaussian Processes}},
year = {2003}
}
@book{mccullagh1989generalized,
address = {London},
author = {Mccullagh, P and Nelder, J A},
edition = {2nd},
keywords = {asymptotics,glm,logit,probit,social{\_}science{\_}statistics},
publisher = {Chapman and Hall},
title = {{Generalized Linear Models}},
year = {1989}
}
@article{Baraniuk2008,
author = {Baraniuk, R G and Cevher, V and Duarte, M F and Hegde, C},
journal = {IEEE Transactions on Information Theory},
pages = {1982--2001},
title = {{Model-based compressive sensing}},
volume = {56},
year = {2010}
}
@article{fisher1928limiting,
abstract = {SummaryThe limiting distribution, when n is large, of the greatest or least of a sample of n, must satisfy a functional equation which limits its form to one of two main types. Of these one has, apart from size and position, a single parameter h, while the other is the limit to which it tends when h tends to zero.The appropriate limiting distribution in any case may be found from the manner in which the probability of exceeding any value x tends to zero as x is increased. For the normal distribution the limiting distribution has h = 0.From the normal distribution the limiting distribution is approached with extreme slowness; the final series of forms passed through as the ultimate form is approached may be represented by the series of limiting distributions in which h tends to zero in a definite manner as n increases to infinity.Numerical values are given for the comparison of the actual with the penultimate distributions for samples of 60 to 1000, and of the penultimate with the ultimate distributions for larger samples.},
author = {Fisher, Ronald Aylmer and Tippett, Leonard Henry Caleb},
doi = {10.1017/S0305004100015681},
isbn = {1469-8064},
issn = {0305-0041},
journal = {Mathematical Proceedings of the Cambridge Philosophical Society},
pages = {180},
title = {{Limiting forms of the frequency distribution of the largest or smallest member of a sample}},
volume = {24},
year = {1928}
}
@article{nino-nora2010computing,
author = {Nino-Mora, J},
doi = {10.1287/ijoc.1100.0398},
issn = {10919856},
journal = {INFORMS Journal on Computing},
keywords = {accepted may 2010,accepted winfried grassmann,advance,analysis algorithms,area editor computational,bandits,computational complexity,dynamic programming,finite horizon,history,index policies,march 2009,markov,may 2010,probability analysis,published online articles,received,revised january 2010},
number = {2},
pages = {254--267},
title = {{Computing a Classic Index for Finite-Horizon Bandits}},
url = {http://joc.journal.informs.org/cgi/doi/10.1287/ijoc.1100.0398},
volume = {23},
year = {2010}
}
@inproceedings{park2002map,
author = {Park, James},
booktitle = {Proceedings of the 18th Conference on Uncertainty in Artificial Intelligence},
pages = {388--396},
title = {{{\{}MAP{\}} Complexity Results and Approximation Methods}},
year = {2002}
}
@article{yu,
author = {Yu, B},
journal = {The Annals of Probability},
number = {1},
pages = {94--116},
title = {{Rates of convergence for empirical processes of stationary mixing sequences}},
volume = {22},
year = {1994}
}
@inproceedings{xing2003distance,
annote = {comps{\_}distance},
author = {Xing, Eric P and Ng, Andrew Y and Jordan, Michael I and Russell, Stuart},
booktitle = {Advances in Neural Information Processing Systems 15},
pages = {505--512},
publisher = {MIT Press},
title = {{Distance metric learning, with application to clustering with side-information}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.58.3667},
year = {2003}
}
@article{donoho,
author = {Donoho, D L and Johnstone, I M},
journal = {Journal of the American Statistical Association},
number = {432},
pages = {1200--1224},
publisher = {American Statistical Association},
title = {{Adapting to Unknown Smoothness Via Wavelet Shrinkage.}},
volume = {90},
year = {1995}
}
@inproceedings{hoey1999spudd:,
author = {Hoey, Jesse and St-Aubin, Robert and Hu, Alan and Boutilier, Craig},
booktitle = {Proceedings of the 15th Conference on Uncertainty in Artificial Intelligence},
pages = {279--288},
title = {{{\{}SPUDD{\}}: Stochastic Planning using Decision Diagrams}},
year = {1999}
}
@article{crammer2002algorithmic,
address = {Cambridge, MA, USA},
author = {Crammer, Koby and Singer, Yoram},
issn = {1533-7928},
journal = {J. Mach. Learn. Res.},
pages = {265--292},
publisher = {MIT Press},
title = {{On the algorithmic implementation of multiclass kernel-based vector machines}},
volume = {2},
year = {2002}
}