Commit 886c7bc4 authored by Valentin Antuori's avatar Valentin Antuori
Browse files

udpate mcts

parent e16e02fb
File added
#include <string>
#include <cmath>
#include <limits>
#include "heuristic.h"
#define DEBUG_H false
#define DEBUG_H_MACCRO false
using namespace std;
bool DEBUG_H = DEBUG_H_MACCRO;
double default_temperature = 0.1;
void set_default_temperature(double temperature)
{
......@@ -26,18 +29,22 @@ unsigned int choice(vector<double> proba)
r -= prob;
}
for(double p : proba){
std::cerr << p << " ";
}
std::cerr << std::endl <<r << std::endl;
// proba should sum to one, but....
return rand() % proba.size();
throw string("Invalid probability vector");
// std::cerr << "Error Proba : " << std::endl;
// for(double p : proba){
// std::cerr << p << " ";
// }
// std::cerr << std::endl <<r << std::endl;
// throw string("Invalid probability vector");
}
std::unique_ptr<Solution> random_sample(const Instance& data)
{
unique_ptr<Solution> sol = make_unique<Solution>(data);
State s(data);
unique_ptr<Solution> sol = make_unique<Solution>(&data);
State s(sol->data);
for(int i = sol->size(); i < data.nb_tasks; ++i)
{
// std::cerr << "step "<< i << std::endl;
......@@ -66,18 +73,15 @@ std::unique_ptr<Solution> random_sample(const Instance& data)
return sol;
}
unique_ptr<Solution> build(const Instance& data)
void build(Solution& sol)
{
unique_ptr<Solution> sol = make_unique<Solution>(data);
State s(data);
build(data, *sol, s);
return sol;//compiler should replace this statement by "return std::move(sol);"
State s(sol.data);
build(sol, s);
}
void build(const Instance& data, Solution& sol, State& s)
void build(Solution& sol, State& s)
{
for(int i = sol.size(); i < data.nb_tasks; ++i)
for(int i = sol.size(); i < sol.data->nb_tasks; ++i)
{
if(DEBUG_H)
......@@ -90,7 +94,7 @@ void build(const Instance& data, Solution& sol, State& s)
// compute proba;
vector<double> proba = get_distribution(data, s, actions);
vector<double> proba = get_distribution(*sol.data, s, actions);
if(DEBUG_H)
for(size_t i = 0; i < proba.size(); ++i){
std::cerr << actions[i] << " : " << proba[i] << std::endl;
......@@ -125,10 +129,12 @@ vector<double> get_distribution(const Instance& data, const State& s, const std:
// std::cerr << "Size : " << actions.size() << std::endl;
double sum_exp_fitness = 0;
// std::cerr << "exp_finess" << std::endl;
vector<double> fitnesses(actions.size());
vector<double> exp_fitness(actions.size());
// std::cerr << "vector1" << std::endl;
vector<double> proba(actions.size());
// std::cerr << "vector2" << std::endl;
double max_fit = -std::numeric_limits<float>::infinity();
if(DEBUG_H){
std::cerr << "last action :";
......@@ -138,14 +144,17 @@ vector<double> get_distribution(const Instance& data, const State& s, const std:
for(size_t i = 0; i < actions.size(); ++i)
{
//compute fitness for action i
if(DEBUG_H)
std::cerr << actions[i] << " : " ;
double fitness = fitness_func(data, s, actions[i]);
if(DEBUG_H)
std::cerr << "Fitness="<<fitness << std::endl;
exp_fitness[i] = exp(fitness / temperature);
if(DEBUG_H){
std::cerr << actions[i] << " : " ;
std::cerr << "Fitness="<<fitness << std::endl;
}
max_fit = std::max(fitness, max_fit);
fitnesses[i] = fitness;
}
for(size_t i = 0; i < actions.size(); ++i)
{
exp_fitness[i] = exp((fitnesses[i] - max_fit) / temperature);
sum_exp_fitness += exp_fitness[i];
}
......@@ -155,7 +164,7 @@ vector<double> get_distribution(const Instance& data, const State& s, const std:
proba[i] = exp_fitness[i] / sum_exp_fitness;
}
return proba;
return proba; // compiler should optimize this
}
double fitness_func(const Instance& data, const State& s, int action)
......
......@@ -12,16 +12,17 @@
#define THETA_4 0.023875557385973
extern double default_temperature;
extern bool DEBUG_H;
void set_default_temperature(double temperature);
/**
* Build solution from scratch
*/
std::unique_ptr<Solution> build(const Instance& data);
void build(Solution& sol);
/**
* Build solution from partial solution, and state
*/
void build(const Instance& data, Solution& sol, State& state);
void build(Solution& sol, State& state);
/**
* Perform a random sample
......
This diff is collapsed.
......@@ -10,6 +10,8 @@
#define BACKUP_MEAN 0
#define BACKUP_BEST 1
extern bool DEBUG;
class Node;
/**
Main function, run the algorithm on the instance in data
......@@ -20,10 +22,10 @@ Solution solve(const Instance& data, double c, bool print_stat, int timeout, int
* Samples from the partial solution *sol*
* compute the mean value and the std deviation of the current_node
* Update the best solution if it finds better
* Return the best sample's objective value
*/
std::unique_ptr<Solution> samples(Node& current_node, const Solution& sol, const State& s, std::unique_ptr<Solution> best_sol);
int samples(Node& current_node, const Solution& sol, const State& s, Solution& best_sol);
std::unique_ptr<Solution> samples(Node& current_node, const Solution& sol, const State& s);
class Node
{
......@@ -32,11 +34,16 @@ class Node
/**/
double R; // Immediate reward on the edge from parent to this (0 for lmax)
double W; // Sum of Q_val towards runs
double avg_W; // Q-value estimation
double W; // Sum of Rewards
//double avg_W; // Q-value estimation
double P; // Prior probability
int N; // Visit count
double dyna_std_dev;
double dyna_variance_N; // = variance * N
double dyna_mean;
// Use in static way
double std_dev;
double mean;
......
......@@ -2,13 +2,12 @@
#include <iostream>
#include <iterator>
#include "instance.h"
Solution::Solution(const Instance& inst): data(inst),sequence(inst.nb_tasks, -1),m_positions(inst.nb_tasks, -1),
m_starts(inst.nb_tasks, 0),m_max_tardiness(inst.nb_tasks, 0),m_trains(inst.nb_tasks, 0),m_size(0)
Solution::Solution(const Instance* inst): data(inst),sequence(data->nb_tasks, -1),m_positions(data->nb_tasks, -1),
m_starts(data->nb_tasks, 0),m_max_tardiness(data->nb_tasks, 0),m_trains(data->nb_tasks, 0),m_size(0)
{
m_trains[0] = 0;
}
......@@ -30,7 +29,7 @@ int Solution::start(const int pos)const
}
int Solution::end(const int pos)const
{
return m_starts[pos] + data.duration(sequence[pos]);
return m_starts[pos] + data->duration(sequence[pos]);
}
int Solution::start_of_task(const int task)const
{
......@@ -42,7 +41,7 @@ int Solution::end_of_task(const int task)const
}
int Solution::tardiness(const int pos)const
{
return std::max(0, end(pos) - data.due_date(sequence[pos]));
return std::max(0, end(pos) - data->due_date(sequence[pos]));
}
int Solution::lmax() const
{
......@@ -82,20 +81,20 @@ void Solution::append(const int task)
if(size() == 0)
{
// std::cout << "Add start date as the release date : " << data.release_date(task) <<std::endl;
m_starts[size()] = data.release_date(task);
// std::cout << "Add start date as the release date : " << data->release_date(task) <<std::endl;
m_starts[size()] = data->release_date(task);
m_max_tardiness[size()] = tardiness(0);
}
else
{
// std::cout << "start is " < <<std::endl;
m_starts[size()] = std::max(end(size()-1) + data.distance(sequence[size()-1], task), data.release_date(task));
m_starts[size()] = std::max(end(size()-1) + data->distance(sequence[size()-1], task), data->release_date(task));
m_max_tardiness[size()] = std::max(tardiness(size()), max_tardiness(size()-1));
}
if(size() < data.nb_tasks-1)
if(size() < data->nb_tasks-1)
{
m_trains[size()+1] = train(size()) + data.train_use(task);
m_trains[size()+1] = train(size()) + data->train_use(task);
}
// std::cout<<" Start : ";
// for(auto t : m_starts){
......@@ -131,60 +130,60 @@ int Solution::is_relax_valid() const
int train_acc = 0;
int Lmax = 0;
if(size() != data.nb_tasks)
if(size() != data->nb_tasks)
{
std::cerr << "Solution is invalid : incorect size " << size() << ", should be " << data.nb_tasks << std::endl;
std::cerr << "Solution is invalid : incorect size " << size() << ", should be " << data->nb_tasks << std::endl;
return -1;
}
// vector of bool only to check precedences
std::vector<bool> is_done(data.nb_tasks, false);
std::vector<bool> is_done(data->nb_tasks, false);
//vector of int to check period idxs are increasing
std::vector<int> last_period(data.nb_components, 0);
std::vector<int> last_period(data->nb_components, 0);
time_acc += std::max(data.duration(sequence[0]), data.release_date(sequence[0]));
train_acc += data.train_use(sequence[0]);
time_acc += std::max(data->duration(sequence[0]), data->release_date(sequence[0]));
train_acc += data->train_use(sequence[0]);
is_done[sequence[0]] = true;
if(data.is_delivery(sequence[0]) || train_acc > data.T_max || data.period(sequence[0]) > 0){
if(data->is_delivery(sequence[0]) || train_acc > data->T_max || data->period(sequence[0]) > 0){
return -1;
}
if(time_acc > data.due_date(sequence[0])){
Lmax = time_acc - data.due_date(sequence[0]);
if(time_acc > data->due_date(sequence[0])){
Lmax = time_acc - data->due_date(sequence[0]);
}
// std::cout << data.task_to_string(sol.sequence[0]) << " | " << 0 << " " << time_acc << " " << train_acc << std::endl;
// std::cout << data->task_to_string(sol.sequence[0]) << " | " << 0 << " " << time_acc << " " << train_acc << std::endl;
for(int i = 1; i < data.nb_tasks; ++i)
for(int i = 1; i < data->nb_tasks; ++i)
{
int task = sequence[i];
time_acc = std::max(time_acc + data.distance(sequence[i-1], sequence[i]), data.release_date(sequence[i])) + data.duration(sequence[i]);
train_acc += data.train_use(sequence[i]);
time_acc = std::max(time_acc + data->distance(sequence[i-1], sequence[i]), data->release_date(sequence[i])) + data->duration(sequence[i]);
train_acc += data->train_use(sequence[i]);
// std::cout << data.task_to_string(task) << " | " << i << " " << time_acc << " " << train_acc << std::endl;
// std::cout << data->task_to_string(task) << " | " << i << " " << time_acc << " " << train_acc << std::endl;
int tardiness = std::max(0, time_acc - data.due_date(sequence[i]));
int tardiness = std::max(0, time_acc - data->due_date(sequence[i]));
Lmax = std::max(Lmax, tardiness);
if(train_acc > data.T_max)
if(train_acc > data->T_max)
{
std::cerr << "Solution is invalid : Pos " << i << " train = " << train_acc << std::endl;
return -1;
}
if(data.period(task) < last_period[data.component(task)])
if(data->period(task) < last_period[data->component(task)])
{
for(auto p : last_period){
std::cout << p << " ";
}
std::cout << std::endl << last_period[data.component(task)] << std::endl;
std::cerr << "Solution is invalid : Pos " << i << " periods are not in increasing order for components " << data.component(task) << std::endl;
std::cout << std::endl << last_period[data->component(task)] << std::endl;
std::cerr << "Solution is invalid : Pos " << i << " periods are not in increasing order for components " << data->component(task) << std::endl;
return -1;
}
if(data.is_delivery(task) && !is_done[data.get_pickup(task)])
if(data->is_delivery(task) && !is_done[data->get_pickup(task)])
{
std::cerr << "Solution is invalid : Pos " << i << " delivery before pickup" << std::endl;
return -1;
......@@ -197,11 +196,33 @@ int Solution::is_relax_valid() const
}
is_done[task] = true;
last_period[data.component(task)] = data.period(task);
last_period[data->component(task)] = data->period(task);
}
return Lmax;
}
void Solution::complete_print() const
{
int time = 0;
int train_length = 0;
for(int i = 0; i < size(); ++i)
{
int task = sequence[i];
if(i > 0){
time += data->distance(sequence[i-1], task);
}
time = std::max(time, data->release_date(task));
std::cout << task << " : " << start_of_task(task) << "=(" << time << ") [" << data->release_date(task) <<", "<<data->due_date(task) << "] - "
<< train(i) <<"=(" << train_length << ")"
<< std::endl;
time += data->duration(task);
train_length += data->train_use(task);
}
}
std::ostream& operator << (std::ostream& out, const Solution& sol)
{
out << "Size = " << sol.size() << std::endl;
......@@ -215,18 +236,18 @@ std::ostream& operator << (std::ostream& out, const Solution& sol)
State::State(const Instance& inst): data(inst), date(0), tardiness(0), last_action(NO_ACTION), train_size(0),
nexts(data.nb_components, NEW_PERIOD), curr_period(data.nb_components, 0), _lst(data.nb_tasks, 0),
go_back(data.nb_components, false)
State::State(const Instance* inst): data(inst), date(0), tardiness(0), last_action(NO_ACTION), train_size(0),
nexts(data->nb_components, NEW_PERIOD), curr_period(data->nb_components, 0), _lst(data->nb_tasks, 0),
go_back(data->nb_components, false)
{
// compute lst for each components pickup
for(int comp = 0; comp < data.nb_components; ++comp)
for(int comp = 0; comp < data->nb_components; ++comp)
{
int pick_idx = data.get_pickfull_idx(comp, 0);
int durations = data.duration(pick_idx) + data.duration(pick_idx+1) + data.duration(pick_idx+2) + data.duration(pick_idx+3);
int travels = data.distance(pick_idx, pick_idx+1) + data.distance(pick_idx+1, pick_idx+2) + data.distance(pick_idx+2, pick_idx+3);
int pick_idx = data->get_pickfull_idx(comp, 0);
int durations = data->duration(pick_idx) + data->duration(pick_idx+1) + data->duration(pick_idx+2) + data->duration(pick_idx+3);
int travels = data->distance(pick_idx, pick_idx+1) + data->distance(pick_idx+1, pick_idx+2) + data->distance(pick_idx+2, pick_idx+3);
_lst[comp] = data.due_date(pick_idx+3) - travels - durations;
_lst[comp] = data->due_date(pick_idx+3) - travels - durations;
}
}
......@@ -238,30 +259,30 @@ State::State(const State& o) : data(o.data), date(o.date), tardiness(o.tardiness
void State::update(int task)
{
int comp = data.component(task);
int comp = data->component(task);
int dist = 0;
if(last_action != NO_ACTION){
dist = data.distance(last_action, task);
dist = data->distance(last_action, task);
}
train_size += data.train_use(task);
date = std::max(date + dist, data.release_date(task)) + data.duration(task);
train_size += data->train_use(task);
date = std::max(date + dist, data->release_date(task)) + data->duration(task);
// compute the tardiness
int tmp_tard = std::max(0, date - data.due_date(task));
int tmp_tard = std::max(0, date - data->due_date(task));
if(tmp_tard > tardiness){
tardiness = tmp_tard;
}
if(/*nexts[comp] == NEW_PERIOD || */data.is_pickup(task)){
if(/*nexts[comp] == NEW_PERIOD || */data->is_pickup(task)){
nexts[comp] = task+1;
}else{
// Delivery
if(go_back[comp]){ // means second delivery
// go to the next periods if exist
curr_period[comp]++;
if(curr_period[comp] >= data.get_nb_periods(comp)){
if(curr_period[comp] >= data->get_nb_periods(comp)){
// No new period for this component
nexts[comp] = ENDED;
}else{
......@@ -272,24 +293,24 @@ void State::update(int task)
}else{
go_back[comp] = true;
// next task is a the other pickup
if(data.is_empty_operation(task)){
nexts[comp] = data.get_pickfull_idx(comp, curr_period[comp]);
if(data->is_empty_operation(task)){
nexts[comp] = data->get_pickfull_idx(comp, curr_period[comp]);
}else{
nexts[comp] = data.get_pickempty_idx(comp, curr_period[comp]);
nexts[comp] = data->get_pickempty_idx(comp, curr_period[comp]);
}
}
}
// then compute the lst
if(nexts[comp] == NEW_PERIOD){
int pick_idx = data.get_pickfull_idx(comp, curr_period[comp]);
int durations = data.duration(pick_idx) + data.duration(pick_idx+1) + data.duration(pick_idx+2) + data.duration(pick_idx+3);
int travels = data.distance(pick_idx, pick_idx+1) + data.distance(pick_idx+1, pick_idx+2) + data.distance(pick_idx+2, pick_idx+3);
int pick_idx = data->get_pickfull_idx(comp, curr_period[comp]);
int durations = data->duration(pick_idx) + data->duration(pick_idx+1) + data->duration(pick_idx+2) + data->duration(pick_idx+3);
int travels = data->distance(pick_idx, pick_idx+1) + data->distance(pick_idx+1, pick_idx+2) + data->distance(pick_idx+2, pick_idx+3);
_lst[comp] = data.due_date(pick_idx+3) - travels - durations;
_lst[comp] = data->due_date(pick_idx+3) - travels - durations;
}else if(nexts[comp] != ENDED){
_lst[comp] += data.duration(task);
_lst[comp] += data.distance(task, nexts[comp]);
_lst[comp] += data->duration(task);
_lst[comp] += data->distance(task, nexts[comp]);
}
last_action = task;
......@@ -298,24 +319,24 @@ void State::update(int task)
}
int State::lst(int task) const
{
return _lst[data.component(task)];
return _lst[data->component(task)];
}
std::vector<int> State::get_actions() const
{
std::vector<int> actions;
for(int comp = 0; comp < data.nb_components; ++comp)
for(int comp = 0; comp < data->nb_components; ++comp)
{
if(nexts[comp] == NEW_PERIOD)
{
int pick = data.get_pickfull_idx(comp, curr_period[comp]);
if(train_size + data.train_use(pick) <= data.T_max){
int pick = data->get_pickfull_idx(comp, curr_period[comp]);
if(train_size + data->train_use(pick) <= data->T_max){
actions.push_back(pick);
actions.push_back(data.get_pickempty_idx(comp, curr_period[comp]));
actions.push_back(data->get_pickempty_idx(comp, curr_period[comp]));
}
}else if(nexts[comp] != ENDED)
{
if(train_size + data.train_use(nexts[comp]) <= data.T_max){
if(train_size + data->train_use(nexts[comp]) <= data->T_max){
actions.push_back(nexts[comp]);
}
}
......
......@@ -14,11 +14,12 @@
class Solution
{
public:
const Instance& data;
const Instance* data;
/** sequence of tasks (=\sigma)**/
std::vector<int> sequence;
Solution(const Instance& data);
Solution(const Instance* data);
Solution(const Solution& sol);
......@@ -50,10 +51,13 @@ class Solution
bool is_valid() const;
/** recompute solution and return lmax, or -1 if invalid : O(n) **/
int is_relax_valid() const;
void complete_print() const;
friend std::ostream& operator << (std::ostream& out, const Solution& sol);
/** position of each tasks (=\rho = \sigma^{-1})**/
std::vector<int> m_positions;
/** start dates at each position **/
......@@ -71,7 +75,7 @@ class Solution
class State
{
public:
const Instance& data;
const Instance* data;
int date;
int tardiness;
......@@ -86,7 +90,7 @@ class State
// True if at least two of the four operation are made for a component
std::vector<bool> go_back;
State(const Instance& data);
State(const Instance* data);
State(const State& o);
void update(int action);
int lst(int task) const;
......
#include <vector>
#include <iostream>
using namespace std;
void test_move(std::vector<int>& v)
{
std::vector<int> vprime{10, 20, 30, 40, 50};
v = std::move(vprime);
}
int main(int argc, char **argv){
std::vector<int> v{0, 1, 2, 3, 4, 5};
for(int elt : v)
std::cout << elt << " ";
std::cout << std::endl;
test_move(v);
for(int elt : v)
std::cout << elt << " ";
std::cout << std::endl;
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment