@techreport{perrault2019finding,
abstract = {We consider the problem where an agent wants to find a hidden object that is randomly located in some vertex of a directed acyclic graph (DAG) according to a fixed but possibly unknown distribution. The agent can only examine vertices whose in-neighbors have already been examined. In scheduling theory, this problem is denoted by 1|prec|∑wjCj [Graham1979]. However, in this paper we address a learning setting where we allow the agent to stop before having found the object and restart searching on a new independent instance of the same problem. The goal is to maximize the total number of hidden objects found under a time constraint. The agent can thus skip an instance after realizing that it would spend too much time on it. Our contributions are both to the search theory and multi-armed bandits. If the distribution is known, we provide a quasi-optimal greedy strategy with the help of known computationally efficient algorithms for solving 1|prec|∑wjCj under some assumption on the DAG. If the distribution is unknown, we show how to sequentially learn it and, at the same time, act near-optimally in order to collect as many hidden objects as possible. We provide an algorithm, prove theoretical guarantees, and empirically show that it outperforms the naive baseline.},
author = {Perrault, Pierre and Perchet, Vianney and Valko, Michal},
title = {{Finding the bandit in a graph: Sequential search-and-stop}},
year = {2019}
}