Commit 9f2fd4c9 authored by Valentin Antuori's avatar Valentin Antuori
Browse files

mcts works

parent 935802d3
48
8000
0 0 0 PF 2200 14 40 0 2000 5
1 0 0 DF 2200 7 40 0 2000 2
2 0 0 PE 2200 7 15 0 2000 2
3 0 0 DE 2200 14 15 0 2000 5
4 0 1 PF 2200 14 40 2000 4000 5
5 0 1 DF 2200 7 40 2000 4000 2
6 0 1 PE 2200 7 15 2000 4000 2
7 0 1 DE 2200 14 15 2000 4000 5
8 1 0 PF 1500 1 40 0 2120 2
9 1 0 DF 1500 14 40 0 2120 5
10 1 0 PE 1500 14 15 0 2120 5
11 1 0 DE 1500 1 15 0 2120 2
12 1 1 PF 1500 1 40 2120 4240 2
13 1 1 DF 1500 14 40 2120 4240 5
14 1 1 PE 1500 14 15 2120 4240 5
15 1 1 DE 1500 1 15 2120 4240 2
16 2 0 PF 1800 1 40 0 1840 4
17 2 0 DF 1800 6 40 0 1840 2
18 2 0 PE 1800 6 15 0 1840 2
19 2 0 DE 1800 1 15 0 1840 4
20 2 1 PF 1800 1 40 1840 3680 4
21 2 1 DF 1800 6 40 1840 3680 2
22 2 1 PE 1800 6 15 1840 3680 2
23 2 1 DE 1800 1 15 1840 3680 4
24 3 0 PF 2000 1 40 0 1650 3
25 3 0 DF 2000 6 40 0 1650 2
26 3 0 PE 2000 6 15 0 1650 2
27 3 0 DE 2000 1 15 0 1650 3
28 3 1 PF 2000 1 40 1650 3300 3
29 3 1 DF 2000 6 40 1650 3300 2
30 3 1 PE 2000 6 15 1650 3300 2
31 3 1 DE 2000 1 15 1650 3300 3
32 4 0 PF 2400 16 40 0 1200 3
33 4 0 DF 2400 18 40 0 1200 4
34 4 0 PE 2400 18 15 0 1200 4
35 4 0 DE 2400 16 15 0 1200 3
36 4 1 PF 2400 16 40 1200 2400 3
37 4 1 DF 2400 18 40 1200 2400 4
38 4 1 PE 2400 18 15 1200 2400 4
39 4 1 DE 2400 16 15 1200 2400 3
40 5 0 PF 1900 1 40 0 2650 2
41 5 0 DF 1900 16 40 0 2650 3
42 5 0 PE 1900 16 15 0 2650 3
43 5 0 DE 1900 1 15 0 2650 2
44 5 1 PF 1900 1 40 2650 5300 2
45 5 1 DF 1900 16 40 2650 5300 3
46 5 1 PE 1900 16 15 2650 5300 3
47 5 1 DE 1900 1 15 2650 5300 2
0 0 0 PF 2200 13 40 0 2000 5
1 0 0 DF 2200 6 40 0 2000 2
2 0 0 PE 2200 6 15 0 2000 2
3 0 0 DE 2200 13 15 0 2000 5
4 0 1 PF 2200 13 40 2000 4000 5
5 0 1 DF 2200 6 40 2000 4000 2
6 0 1 PE 2200 6 15 2000 4000 2
7 0 1 DE 2200 13 15 2000 4000 5
8 1 0 PF 1500 0 40 0 2120 2
9 1 0 DF 1500 13 40 0 2120 5
10 1 0 PE 1500 13 15 0 2120 5
11 1 0 DE 1500 0 15 0 2120 2
12 1 1 PF 1500 0 40 2120 4240 2
13 1 1 DF 1500 13 40 2120 4240 5
14 1 1 PE 1500 13 15 2120 4240 5
15 1 1 DE 1500 0 15 2120 4240 2
16 2 0 PF 1800 0 40 0 1840 4
17 2 0 DF 1800 5 40 0 1840 2
18 2 0 PE 1800 5 15 0 1840 2
19 2 0 DE 1800 0 15 0 1840 4
20 2 1 PF 1800 0 40 1840 3680 4
21 2 1 DF 1800 5 40 1840 3680 2
22 2 1 PE 1800 5 15 1840 3680 2
23 2 1 DE 1800 0 15 1840 3680 4
24 3 0 PF 2000 0 40 0 1650 3
25 3 0 DF 2000 5 40 0 1650 2
26 3 0 PE 2000 5 15 0 1650 2
27 3 0 DE 2000 0 15 0 1650 3
28 3 1 PF 2000 0 40 1650 3300 3
29 3 1 DF 2000 5 40 1650 3300 2
30 3 1 PE 2000 5 15 1650 3300 2
31 3 1 DE 2000 0 15 1650 3300 3
32 4 0 PF 2400 15 40 0 1200 3
33 4 0 DF 2400 17 40 0 1200 4
34 4 0 PE 2400 17 15 0 1200 4
35 4 0 DE 2400 15 15 0 1200 3
36 4 1 PF 2400 15 40 1200 2400 3
37 4 1 DF 2400 17 40 1200 2400 4
38 4 1 PE 2400 17 15 1200 2400 4
39 4 1 DE 2400 15 15 1200 2400 3
40 5 0 PF 1900 0 40 0 2650 2
41 5 0 DF 1900 15 40 0 2650 3
42 5 0 PE 1900 15 15 0 2650 3
43 5 0 DE 1900 0 15 0 2650 2
44 5 1 PF 1900 0 40 2650 5300 2
45 5 1 DF 1900 15 40 2650 5300 3
46 5 1 PE 1900 15 15 2650 5300 3
47 5 1 DE 1900 0 15 2650 5300 2
19
0 78 38 72 97 142 137 114 0 173 176 161 180 128 128 140 170 170 302
......
......@@ -5,6 +5,12 @@
using namespace std;
double default_temperature = 0.1;
void set_default_temperature(double temperature)
{
default_temperature = temperature;
}
unsigned int choice(vector<double> proba)
{
double r = (double)rand() / RAND_MAX;
......@@ -26,6 +32,38 @@ unsigned int choice(vector<double> proba)
throw string("Invalid probability vector");
}
std::unique_ptr<Solution> random_sample(const Instance& data)
{
unique_ptr<Solution> sol = make_unique<Solution>(data);
State s(data);
for(int i = sol->size(); i < data.nb_tasks; ++i)
{
// std::cerr << "step "<< i << std::endl;
vector<int> actions = s.get_actions();
// std::cerr << "Candidates : " << std::endl;
// std::cerr << std::endl;
// compute proba;
// vector<double> proba = get_distribution(data, s, actions);
// for(size_t i = 0; i < proba.size(); ++i){
// std::cerr << actions[i] << " : " << proba[i] << std::endl;
// }
int idx = rand() % actions.size();
sol->append(actions[idx]);
s.update(actions[idx]);
// std::cerr << s << std::endl;
// std::cerr << sol << std::endl;
// std::cerr << " --- " << std::endl;
}
return sol;
}
unique_ptr<Solution> build(const Instance& data)
{
unique_ptr<Solution> sol = make_unique<Solution>(data);
......@@ -42,14 +80,14 @@ void build(const Instance& data, Solution& sol, State& s)
// std::cerr << "step "<< i << std::endl;
vector<int> actions = s.get_actions();
// std::cerr << "Candidates : ";
// for(auto a : actions){
// std::cerr << a << " ";
// }
// std::cerr << std::endl;
// std::cerr << "Candidates : " << std::endl;
// compute proba;
vector<double> proba = get_distribution(data, s, actions);
// for(size_t i = 0; i < proba.size(); ++i){
// std::cerr << actions[i] << " : " << proba[i] << std::endl;
// }
int idx = choice(proba);
......@@ -63,7 +101,14 @@ void build(const Instance& data, Solution& sol, State& s)
}
// std::cerr << "End Build" << std::endl;
}
vector<double> get_distribution(const Instance& data, const State& s, const std::vector<int>& actions)
{
return get_distribution(data, s, actions, default_temperature);
}
vector<double> get_distribution(const Instance& data, const State& s, const std::vector<int>& actions, const double temperature)
{
double sum_exp_fitness = 0;
vector<double> exp_fitness(actions.size());
......@@ -74,11 +119,12 @@ vector<double> get_distribution(const Instance& data, const State& s, const std:
for(size_t i = 0; i < actions.size(); ++i)
{
//compute fitness for action i
// std::cerr << actions[i] << " : " ;
double fitness = fitness_func(data, s, actions[i]);
// std::cerr << actions[i] << " : " << fitness << std::endl;
// std::cerr << "Fitness="<<fitness << std::endl;
exp_fitness[i] = exp(fitness);
exp_fitness[i] = exp(fitness / temperature);
sum_exp_fitness += exp_fitness[i];
}
......@@ -94,11 +140,21 @@ vector<double> get_distribution(const Instance& data, const State& s, const std:
double fitness_func(const Instance& data, const State& s, int action)
{
// distance return 0 if last_task < 0
double lambda1 = (double)(s.lst(action) - max(data.release_date(action), s.date - s.tardiness + data.distance(s.last_action, action))) / data.max_slack;
double lambda2 = (double)(max(data.release_date(action) - (s.date - s.tardiness), data.distance(s.last_action, action))) / data.max_dist;
int dist = data.distance(s.last_action, action);
if(s.last_action < 0)
{
dist = 0;
}
double lambda1 = (double)(s.lst(action) - max(data.release_date(action), s.date - s.tardiness + dist)) / data.max_slack;
double lambda2 = (double)(max(data.release_date(action) - (s.date - s.tardiness), dist)) / data.max_dist;
double lambda3 = 1 - (double)data.trolley_length(action) / data.T_max;
double lambda4 = data.is_pickup(action) ? 1 : 0;
// std::cerr << "lst=" << s.lst(action) << ", release=" << data.release_date(action) << ",date=" << s.date << ", tard="<< s.tardiness << std::endl;
// std::cerr << lambda1 << " / " << lambda2 << " / " << lambda3 << " / " << lambda4 << std::endl;
return 1 - (THETA_1 * lambda1 + THETA_2 * lambda2 + THETA_3 * lambda3 + THETA_4 * lambda4);
......
......@@ -11,6 +11,9 @@
#define THETA_3 0.148330024098084
#define THETA_4 0.023875557385973
extern double default_temperature;
void set_default_temperature(double temperature);
/**
* Build solution from scratch
*/
......@@ -19,11 +22,20 @@ std::unique_ptr<Solution> build(const Instance& data);
* Build solution from partial solution, and state
*/
void build(const Instance& data, Solution& sol, State& state);
/**
* Return distribution over action
* Perform a random sample
*/
std::unique_ptr<Solution> random_sample(const Instance& data);
/**
* With default temperature
*/
std::vector<double> get_distribution(const Instance& data, const State& s, const std::vector<int>& actions);
/**
* Return distribution over action
*/
std::vector<double> get_distribution(const Instance& data, const State& s, const std::vector<int>& actions, const double temperature);
/**
* return action number in {0, 1, ..., |proba|}
*/
unsigned int choice(std::vector<double> proba);
......
......@@ -23,6 +23,45 @@ namespace fs = std::experimental::filesystem;
using namespace std;
void greedy_sample_stat(double temperature, Instance& data, int nb_iter)
{
std::cout << "Greedy sample t=" << temperature << std::endl;
set_default_temperature(temperature);
long lmax_sum = 0;
long depth_sum = 0;
int best_obj = INT_MAX;
int best_depth = INT_MIN;
for(int i = 0; i < nb_iter; ++i){
std::unique_ptr<Solution> sol = build(data);
lmax_sum += sol->lmax();
if(sol->lmax() < best_obj)
{
best_obj = sol->lmax();
}
int depth;
if(sol->lmax() > 0){
for(int j = 0; j < sol->size(); ++j){
if(sol->tardiness(j) > 0){
depth = j+1;
break;
}
}
}
depth_sum += depth;
if(depth > best_depth)
{
best_depth = depth;
}
}
std::cout << nb_iter << std::endl << "- lmax = " << (double)lmax_sum/nb_iter << std::endl;
std::cout << "- depth = " << (double)depth_sum/nb_iter << std::endl;
std::cout << "- Best = " << best_obj << std::endl;
std::cout << "- Best depth = " << best_depth << std::endl;
}
// int Node::count = 0;
int main(int argc, char **argv){
ChariotOptions opt = parse(argc, argv);
......@@ -35,6 +74,9 @@ int main(int argc, char **argv){
vector<Instance> data;
set_default_temperature(opt.temperature);
/* instances(s) reading */
struct stat s;
if( stat(opt.instance_file.c_str(),&s) == 0 )
{
......@@ -58,51 +100,7 @@ int main(int argc, char **argv){
cerr << "No file or directory found : " << opt.instance_file << endl;
}
// Instance inst(opt.instance_file);
/*
vector<int> seq{
178, 179, 176, 177, 44, 45, 46, 47, 0, 1, 2, 134, 316, 228, 3,
229, 88, 135, 132, 317, 89, 90, 133, 444, 348, 91, 470, 445, 446, 349,
318, 319, 447, 471, 468, 398, 374, 399, 284, 375, 372, 469, 230, 285, 286,
231, 262, 287, 263, 260, 396, 373, 397, 350, 136, 261, 420, 421, 422, 351,
137, 138, 423, 50, 51, 48, 49, 182, 183, 180, 181, 4, 5, 6, 7,
139, 92, 93, 94, 95, 234, 290, 184, 185, 186, 187, 235, 232, 320, 321,
291, 288, 322, 233, 8, 289, 9, 10, 54, 55, 52, 53, 11, 96, 140,
97, 98, 323, 141, 142, 448, 143, 99, 474, 449, 450, 354, 475, 472, 355,
451, 352, 266, 473, 376, 353, 267, 264, 400, 401, 402, 377, 378, 403, 379,
424, 425, 426, 427, 265, 190, 191, 188, 189, 56, 57, 58, 59, 12, 13,
14, 146, 324, 15, 147, 144, 100, 325, 326, 101, 102, 145, 327, 236, 294,
237, 238, 103, 192, 193, 194, 195, 270, 239, 295, 292, 271, 268, 404, 405,
406, 407, 382, 383, 148, 293, 16, 17, 18, 149, 150, 269, 19, 60, 61,
62, 63, 151, 104, 478, 105, 106, 479, 476, 356, 357, 358, 477, 107, 454,
196, 197, 198, 199, 428, 429, 430, 455, 359, 452, 431, 380, 381, 296, 330,
453, 242, 297, 298, 243, 22, 23, 20, 21, 240, 331, 154, 241, 64, 65,
66, 67, 155, 152, 299, 110, 328, 153, 329, 111, 108, 202, 203, 200, 201,
109, 410, 386, 272, 411, 408, 409, 480, 387, 362, 481, 482, 273, 274, 363,
158, 483, 456, 275, 300, 159, 156, 457, 458, 301, 24, 25, 26, 157, 459,
360, 434, 435, 70, 71, 68, 69, 432, 27, 204, 205, 206, 207, 246, 361,
384, 385, 114, 433, 247, 244, 302, 245, 115, 112, 334, 303, 113, 335, 332,
162, 333, 163, 160, 72, 73, 74, 75, 28, 208, 209, 210, 211, 29, 30,
161, 31, 306, 278, 248, 249, 250, 251, 364, 279, 276, 307, 304, 365, 338,
339, 336, 277, 305, 32, 76, 77, 78, 79, 486, 337, 366, 487, 484, 367,
460, 33, 34, 35, 461, 462, 485, 116, 117, 118, 390, 391, 388, 119, 164,
389, 414, 415, 412, 413, 463, 165, 166, 438, 439, 436, 437, 214, 215, 212,
213, 167, 120, 168, 121, 122, 82, 83, 80, 81, 123, 36, 216, 217, 218,
219, 37, 38, 169, 170, 464, 252, 39, 253, 171, 342, 308, 343, 340, 341,
370, 309, 310, 465, 466, 311, 280, 467, 371, 368, 442, 443, 440, 281, 282,
441, 174, 254, 255, 369, 392, 126, 393, 394, 283, 416, 417, 418, 419, 395,
127, 490, 124, 175, 172, 125, 491, 488, 173, 42, 43, 40, 222, 223, 220,
221, 489, 84, 85, 86, 87, 41, 256, 344, 130, 345, 346, 131, 128, 257,
258, 129, 347, 259, 314, 226, 227, 224, 225, 315, 312, 313};
Solution sol(inst);
for (auto t : seq) {
sol.append(t);
}
*/
srand (opt.seed);
if(opt.learn)
{
......@@ -135,50 +133,58 @@ int main(int argc, char **argv){
}
else if(opt.mcts)
{
srand (opt.seed);
// {
// Node root = Node(0, 0);
// list<Node*> node_list{&root};
// int count = 0;
// while(!node_list.empty()){
// Node* n = node_list.front();
// node_list.pop_front();
// int nb_child = rand() % 15 + 1;
// if(count < opt.seed){
// for(int i = 0; i < nb_child; ++i){
// Node* tmp = n->add_child(0, 0);
// node_list.push_back(tmp);
// count++;
// }
// }
// }
// }
// std::cout << " --- " << std::endl;
// {
// Node root = Node(0, 0);
// list<Node*> node_list{&root};
// int count = 0;
// while(!node_list.empty()){
// Node* n = node_list.front();
// node_list.pop_front();
// int nb_child = rand() % 15 + 1;
// if(count < opt.seed){
// for(int i = 0; i < nb_child; ++i){
// Node* tmp = n->add_child(0, 0);
// node_list.push_back(tmp);
// count++;
// }
// }
// }
// }
std::unique_ptr<Solution> sol = solve(data[0], 0.5);
std::unique_ptr<Solution> sol = solve(data[0], opt.c);
}
else if(opt.heuristic)
{
std::unique_ptr<Solution> best_sol = build(data[0]);
std::cout << best_sol->lmax() << std::endl;
}
else{
int NB_ITER = 1000;
{
std::cout << "Random sample" << std::endl;
long lmax_sum = 0;
long depth_sum = 0;
int best_obj = INT_MAX;
int best_depth = INT_MIN;
for(int i = 0; i < NB_ITER; ++i){
std::unique_ptr<Solution> sol = random_sample(data[0]);
lmax_sum += sol->lmax();
if(sol->lmax() < best_obj)
{
best_obj = sol->lmax();
}
int depth;
if(sol->lmax() > 0){
for(int j = 0; j < sol->size(); ++j){
if(sol->tardiness(j) > 0){
depth = j+1;
break;
}
}
}
depth_sum += depth;
if(depth > best_depth)
{
best_depth = depth;
}
}
std::cout << NB_ITER << std::endl << "- lmax = " << (double)lmax_sum/NB_ITER << std::endl;
std::cout << "- depth = " << (double)depth_sum/NB_ITER << std::endl;
std::cout << "- Best = " << best_obj << std::endl;
std::cout << "- Best depth = " << best_depth << std::endl;
}
greedy_sample_stat(1, data[0], NB_ITER);
greedy_sample_stat(0.1, data[0], NB_ITER);
greedy_sample_stat(0.01, data[0], NB_ITER);
}
std::cout << "Fin" << std::endl;
// for(int i = 0; i < 10000000000000; ++i ){
......
......@@ -10,7 +10,11 @@
#define DEBUG true
#define DEBUG true
#define EXPAND_TEMPERATURE 0.1
#define MAX_ITER 10000
#define NB_SAMPLE 100
/**
Main function, run the algorithm on the instance in data
......@@ -28,11 +32,16 @@ std::unique_ptr<Solution> solve(const Instance& data, double c)
// create root node and all its children
Node root(0, 0, -1, 0, nullptr);
// get mean and std dev from root
auto sol = Solution(data);
State s(data);
//best_sol = samples(root, sol, s, std::move(best_sol));
// Add the first probe at the root node (in order to have N = 1 for it)
//root.update(root.mean);
int iter_count = 0;
while(best_sol->lmax() > 0)
while(best_sol->lmax() > 0 && iter_count < MAX_ITER)
{
iter_count++;
......@@ -44,21 +53,35 @@ std::unique_ptr<Solution> solve(const Instance& data, double c)
/////////////////////////////
///////// SELECTION /////////
/////////////////////////////
if(DEBUG)
std::cerr << "-- Selection --" << std::endl;
Node* current_node = &root;
while(!current_node->is_leaf())
{
Node* best = nullptr;
double max_ucb = -std::numeric_limits<float>::infinity();
// compute ucb for each children and keep the best one
double sqrt_n = sqrt((double)current_node->N);
std::cerr << "mean=" << current_node->mean << ", std=" << current_node->std_dev << ", N=" << current_node->N << ", sqrt_n=" << sqrt_n << std::endl;
for(auto child : current_node->children){
double ucb = child->Q + c * child->P * sqrt(current_node->N) / 1 + child->N;
double Q = -(child->avg_W - current_node->mean) / current_node->std_dev;
if(child->N < 1)
Q = 0.0;
double ucb = Q + c * child->P * sqrt_n / (1 + child->N);
if(DEBUG)
{
std::cerr << child->action << " : " << ucb << " ( Q=" << Q << ", P=" << child->P << ", N=" << child->N << " )" << std::endl;
std::cerr << "avg=" << child->avg_W << ", " << " - " << std::endl;
}
if(ucb > max_ucb){
max_ucb = ucb;
best = child;
}
}
if(DEBUG)
{
std::cerr << " --- " << std::endl;
}
// Update state
s.update(best->action);
sol->append(best->action);
......@@ -66,19 +89,18 @@ std::unique_ptr<Solution> solve(const Instance& data, double c)
}
std::cerr << "Partial Sol : " << *sol << std::endl;
////////////////////////////
///////// EXPAND /////////
////////////////////////////
if(DEBUG)
std::cerr << "-- Expand --" << std::endl;
std::cerr << "-- Expansion --" << std::endl;
// TODO : Don't expand if node implies tardiness
std::vector<int> actions = s.get_actions();
std::vector<double> action_distribution = get_distribution(data, s, actions);
std::vector<double> action_distribution = get_distribution(data, s, actions, EXPAND_TEMPERATURE);
bool delete_current_node = false;
for(size_t i = 0; i < actions.size(); ++i){
int end_date = current_node->end_date + data.distance(current_node->action, actions[i]) + data.duration(actions[i]);
if(end_date > data.due_date(actions[i])){
if(end_date > data.due_date(actions[i])){ // Don't expand if a children gets tardiness
delete_current_node = true;
break;
}
......@@ -90,53 +112,138 @@ std::unique_ptr<Solution> solve(const Instance& data, double c)
delete current_node;
}else{
std::cerr << "-- Simulation --" << std::endl;
///////////////////////////////
////////// SIMULATION /////////
///////////////////////////////
if(DEBUG)
std::cerr << "-- Simulation --" << std::endl;
/* //--- Classical simulation ---- /
//choose the next randomly here, as we already get the distribution
int action = choice(action_distribution);
sol->append(actions[action]);
s.update(actions[action]);
// Then greedy build from partial solution *sol*, state *s*
build(data, *sol, s);
//*/
// -- Get mean and standard deviation from multiple `dives` --
// best_sol = samples(*current_node, *sol, s, std::move(best_sol));
std::unique_ptr<Solution> iter_best = samples(*current_node, *sol, s);
double backup_value = iter_best->lmax();
if(iter_best->lmax() < best_sol->lmax())
{
best_sol = std::move(iter_best);
}
////////////////////////////
///////// BACKUP /////////
////////////////////////////
if(DEBUG)
std::cerr << "-- Backup --" << std::endl;
double probe_value = sol->lmax();
std::cerr << "-- Backup --" << std::endl;
// double backup_value = sol->lmax();
// double backup_value = current_node->mean;
while(!current_node->is_root()){
// std::cout << "not root :" << *current_node << std::endl;
current_node->update(probe_value);
current_node->update(backup_value);
current_node = current_node->parent;
}
root.update(backup_value);
/////////
// END //
/////////
if(sol->lmax() < best_sol->lmax())
{
best_sol = std::move(sol);
}
// if(sol->lmax() < best_sol->lmax())
// {
// best_sol = std::move(sol);
// }
}
std::cerr << "End iter " << iter_count << ", best solution = " << best_sol->lmax() << std::endl;
std::cout << "iter " << iter_count << ", best solution = " << best_sol->lmax() << ", depth iter = " << sol->size() << std::endl;
}