Commit 935802d3 authored by Valentin Antuori's avatar Valentin Antuori
Browse files

first draft MCTS

parent f083c0a3
......@@ -2,18 +2,12 @@ cmake_minimum_required(VERSION 3.0)
project(main)
find_package(Torch REQUIRED)
link_libraries(stdc++fs)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-z,stack-size=10000000000")
# Crée des variables avec les fichiers à compiler
set(SRCS
src/main.cpp
src/policy-gradient.cpp
......@@ -41,14 +35,9 @@ set(HEADERS
add_executable(main ${SRCS} ${HEADERS})
target_link_libraries(main
#PRIVATE "-Wl, -z stack-size=10000000"
"${TORCH_LIBRARIES}"
)
target_link_libraries(main "${TORCH_LIBRARIES}")
#target_link_libraries(main PRIVATE "-Wl,--stack-size,10000000")
set_target_properties(main PROPERTIES
CXX_STANDARD 17
......
#include <string>
#include <cmath>
#include "heuristic.h"
int choice(std::vector<double> proba)
using namespace std;
unsigned int choice(vector<double> proba)
{
double r = (double)rand() / RAND_MAX;
int i;
unsigned int i;
for (i = 0; i < proba.size(); ++i) {
const double prob = proba[i];
if (r <= prob)
......@@ -14,33 +17,105 @@ int choice(std::vector<double> proba)
else
r -= prob;
}
throw std::string("Invalid probability vector");
for(double p : proba){
std::cerr << p << " ";
}
std::cerr << std::endl <<r << std::endl;
throw string("Invalid probability vector");
}
std::unique_ptr<Solution> build(const Instance& data)
unique_ptr<Solution> build(const Instance& data)
{
unique_ptr<Solution> sol = make_unique<Solution>(data);
State s(data);
build(data, *sol, s);
return sol;//compiler should replace this statement by "return std::move(sol);"
}
void build(const Instance& data, Solution& sol, State& s)
{
for(int i = sol.size(); i < data.nb_tasks; ++i)
{
// std::cerr << "step "<< i << std::endl;
vector<int> actions = s.get_actions();
// std::cerr << "Candidates : ";
// for(auto a : actions){
// std::cerr << a << " ";
// }
// std::cerr << std::endl;
// compute proba;
vector<double> proba = get_distribution(data, s, actions);
int idx = choice(proba);
sol.append(actions[idx]);
s.update(actions[idx]);
// std::cerr << s << std::endl;
// std::cerr << sol << std::endl;
// std::cerr << " --- " << std::endl;
}
// std::cerr << "End Build" << std::endl;
}
void build(const Instance& data, Solution& sol, State& state)
vector<double> get_distribution(const Instance& data, const State& s, const std::vector<int>& actions)
{
double sum_exp_fitness = 0;
vector<double> exp_fitness(actions.size());
vector<double> proba(actions.size());
// std::cerr << "last action :" << s.last_action << std::endl;
for(size_t i = 0; i < actions.size(); ++i)
{
//compute fitness for action i
double fitness = fitness_func(data, s, actions[i]);
// std::cerr << actions[i] << " : " << fitness << std::endl;
exp_fitness[i] = exp(fitness);
sum_exp_fitness += exp_fitness[i];
}
for(size_t i = 0; i < actions.size(); ++i)
{
proba[i] = exp_fitness[i] / sum_exp_fitness;
}
return proba;
}
std::vector<double> get_distribution(const State& s)
double fitness_func(const Instance& data, const State& s, int action)
{
// distance return 0 if last_task < 0
double lambda1 = (double)(s.lst(action) - max(data.release_date(action), s.date - s.tardiness + data.distance(s.last_action, action))) / data.max_slack;
double lambda2 = (double)(max(data.release_date(action) - (s.date - s.tardiness), data.distance(s.last_action, action))) / data.max_dist;
double lambda3 = 1 - (double)data.trolley_length(action) / data.T_max;
double lambda4 = data.is_pickup(action) ? 1 : 0;
// std::cerr << lambda1 << " / " << lambda2 << " / " << lambda3 << " / " << lambda4 << std::endl;
return 1 - (THETA_1 * lambda1 + THETA_2 * lambda2 + THETA_3 * lambda3 + THETA_4 * lambda4);
}
// std::vector<double> get_distrib(const State& s)
// vector<double> get_distrib(const State& s)
// {
// std::vector<double>
// vector<double>
// // int counter = 0;
// int size = tasks_list.size();
// torch::Tensor t = torch::empty({size, 4}, torch::kFloat);
// // std::cout << "candidat : " << std::endl;
// // cout << "candidat : " << endl;
// if(trace_flag){
// std::cout << "candidate : " << std::endl;
// cout << "candidate : " << endl;
// }
// for(uint i = 0; i < tasks_list.size(); ++i){
// int task = tasks_list[i];
......@@ -50,17 +125,17 @@ std::vector<double> get_distribution(const State& s)
// if(last_task > 0)
// dist = data.distance(last_task, task);
// int slack = lst[comp] - std::max(data.release_date(task), (time - tardiness) + dist);
// int slack = lst[comp] - max(data.release_date(task), (time - tardiness) + dist);
// int pick_1 = data.is_pickup(task) ? 1 : 0;
// t[i][0] = (float)slack / data.max_slack;
// t[i][1] = (float)std::max(dist, data.release_date(task) - (time - tardiness)) / data.max_dist;
// t[i][1] = (float)max(dist, data.release_date(task) - (time - tardiness)) / data.max_dist;
// t[i][2] = 1 - (float)data.trolley_length(task) / data.T_max;
// t[i][3] = (float)pick_1;
// if(trace_flag){
// std::cout << tasks_list[i] << "(" << (float)slack / data.max_slack << " " << (float)std::max(dist, data.release_date(task) - (time - tardiness)) / data.max_dist
// << " " << 1-(float)data.trolley_length(task) / data.T_max << " " << pick_1 << ")" << std::endl;
// cout << tasks_list[i] << "(" << (float)slack / data.max_slack << " " << (float)max(dist, data.release_date(task) - (time - tardiness)) / data.max_dist
// << " " << 1-(float)data.trolley_length(task) / data.T_max << " " << pick_1 << ")" << endl;
// }
// }
......
......@@ -6,18 +6,32 @@
#include "solution.h"
#include "instance.h"
#define THETA_1 0.251156433962621
#define THETA_2 0.576637984553322
#define THETA_3 0.148330024098084
#define THETA_4 0.023875557385973
/**
* Build solution from scratch
*/
std::unique_ptr<Solution> build(const Instance& data);
/**
* Build solution from partial solution, and state
*/
void build(const Instance& data, Solution& sol, State& state);
std::vector<double> get_distribution(const State& s);
/**
* Return distribution over action
*/
std::vector<double> get_distribution(const Instance& data, const State& s, const std::vector<int>& actions);
/**
* return action number in {0, 1, ..., |proba|}
*/
int choice(std::vector<double> proba);
unsigned int choice(std::vector<double> proba);
/**
* Return fitness
*/
double fitness_func(const Instance& data, const State& s, int action);
......
......@@ -115,6 +115,9 @@ std::string Instance::task_to_string(const int task)const
}
int Instance::distance(const int task1, const int task2)const
{
if(task1 < 0 || task2 < 0){
return 0;
}
return m_matrix[machine(task1)][machine(task2)];
}
int Instance::distance_max(const int task)const
......
......@@ -17,6 +17,7 @@
#include "options.h"
#include "policy-gradient.h"
#include "mcts.h"
#include "heuristic.h"
namespace fs = std::experimental::filesystem;
using namespace std;
......@@ -174,6 +175,9 @@ int main(int argc, char **argv){
// }
// }
// }
std::unique_ptr<Solution> sol = solve(data[0], 0.5);
}
std::cout << "Fin" << std::endl;
......
#include <list>
#include <limits>
#include <vector>
......@@ -12,21 +10,32 @@
#define DEBUG true
/**
Main function, run the algorithm on the instance in data
@param data : the instance to solve
*/
std::unique_ptr<Solution> solve(const Instance& data, double c)
{
if(DEBUG)
std::cerr << "--- Start MCTS ---" << std::endl;
// initialize best_sol with heuristics
std::unique_ptr<Solution> best_sol = build(data);
if(DEBUG)
std::cerr << "Initial solution : " << best_sol->lmax() << std::endl;
// create root node and all its children
Node root(0, 0, -1, nullptr);
Node root(0, 0, -1, 0, nullptr);
while(true)
int iter_count = 0;
while(best_sol->lmax() > 0)
{
iter_count++;
// create a solution
auto sol = std::make_unique<Solution>(data);
// Need a state representation (in order to compute next available action)
......@@ -35,6 +44,8 @@ std::unique_ptr<Solution> solve(const Instance& data, double c)
/////////////////////////////
///////// SELECTION /////////
/////////////////////////////
if(DEBUG)
std::cerr << "-- Selection --" << std::endl;
Node* current_node = &root;
while(!current_node->is_leaf())
{
......@@ -53,44 +64,69 @@ std::unique_ptr<Solution> solve(const Instance& data, double c)
sol->append(best->action);
current_node = best;
}
std::cerr << "Partial Sol : " << *sol << std::endl;
////////////////////////////
///////// EXPAND /////////
////////////////////////////
std::vector<double> action_distribution = get_distribution(s);
for(int i = 0; i < s.actions.size(); ++i){
current_node->add_child(/*reward=*/0, action_distribution[i], s.actions[i]);
////////////////////////////
if(DEBUG)
std::cerr << "-- Expand --" << std::endl;
// TODO : Don't expand if node implies tardiness
std::vector<int> actions = s.get_actions();
std::vector<double> action_distribution = get_distribution(data, s, actions);
bool delete_current_node = false;
for(size_t i = 0; i < actions.size(); ++i){
int end_date = current_node->end_date + data.distance(current_node->action, actions[i]) + data.duration(actions[i]);
if(end_date > data.due_date(actions[i])){
delete_current_node = true;
break;
}
current_node->add_child(/*reward=*/0, action_distribution[i], actions[i], end_date);
}
///////////////////////////////
////////// SIMULATION /////////
///////////////////////////////
//choose the next randomly here, as we already have the distribution
int action = choice(action_distribution);
sol->append(s.actions[action]);
s.update(s.actions[action]);
// use build fonction from solution and state after
build(data, *sol, s);
if(delete_current_node){
if(DEBUG)
std::cerr << "Delete Node" << std::endl;
delete current_node;
}else{
///////////////////////////////
////////// SIMULATION /////////
///////////////////////////////
if(DEBUG)
std::cerr << "-- Simulation --" << std::endl;
//choose the next randomly here, as we already get the distribution
int action = choice(action_distribution);
sol->append(actions[action]);
s.update(actions[action]);
build(data, *sol, s);
////////////////////////////
///////// BACKUP /////////
////////////////////////////
if(DEBUG)
std::cerr << "-- Backup --" << std::endl;
double probe_value = sol->lmax();
while(!current_node->is_root()){
// std::cout << "not root :" << *current_node << std::endl;
current_node->update(probe_value);
current_node = current_node->parent;
}
/////////
// END //
/////////
if(sol->lmax() < best_sol->lmax())
{
best_sol = std::move(sol);
}
}
////////////////////////////
///////// BACKUP /////////
////////////////////////////
double probe_value = sol->lmax();
while(!current_node->is_root()){
current_node->update(probe_value);
current_node = current_node->parent;
}
std::cerr << "End iter " << iter_count << ", best solution = " << best_sol->lmax() << std::endl;
/////////
// END //
/////////
if(sol->lmax() < best_sol->lmax())
{
best_sol = std::move(sol);
}
}
return best_sol;
return best_sol; //compiler replace by return std::move(best_sol)
}
......@@ -99,12 +135,12 @@ std::unique_ptr<Solution> solve(const Instance& data, double c)
/////////////////////////
int Node::count = 0;
Node::Node(double R, double P, int action, Node* parent)
:R(R), W(0), Q(0), P(P), N(0), action(action), parent(parent)
Node::Node(double R, double P, int action, int end_date, Node* parent)
:R(R), W(0), Q(0), P(P), N(0), action(action), end_date(end_date), parent(parent)
{ id = count++; }
Node* Node::add_child(double reward, double prior_proba, int action) {
Node *n = new Node(reward, prior_proba, action, this);
Node* Node::add_child(double reward, double prior_proba, int action, int end_date) {
Node *n = new Node(reward, prior_proba, action, end_date, this);
children.push_back(n);
return n;
}
......@@ -112,9 +148,21 @@ Node* Node::add_child(double reward, double prior_proba, int action) {
Node::~Node()
{
if(children.size() > 0)
if(children.size() > 0 && !is_root())
{
std::cout << "Destructor" << std::endl;
// remove the node for the parent's children list
size_t i;
for(i = 0; i < parent->children.size(); ++i)
{
if(parent->children[i] == this)
{
break;
}
}
// remove in O(1) with idx;
std::swap(parent->children[i], parent->children.back());
parent->children.pop_back();
}
std::list<Node*> to_delete(children.begin(), children.end());
......@@ -132,7 +180,7 @@ Node::~Node()
// clear the sub node vector here !!!
current->children.clear();
}
// Then delete all the collected nodes (I don't remember why reverse order is or was relevant ?)
// Then delete all the collected nodes (I don't remember why reverse order is/was relevant ?)
for (auto it {to_delete.rbegin() }; it != to_delete.rend(); ++it)
{
delete (*it);
......@@ -146,7 +194,7 @@ bool Node::is_leaf()
}
bool Node::is_root()
{
return parent != nullptr;
return parent == nullptr;
}
void Node::update(double v)
......
#ifndef MCTS_DEF
#define MCTS_DEF
#include <iostream>
#include <typeinfo>
#include <vector>
#include <memory>
......@@ -13,7 +11,7 @@
Main function, run the algorithm on the instance in data
@param data : the instance to solve
*/
std::unique_ptr<Solution> solve(const Instance& data);
std::unique_ptr<Solution> solve(const Instance& data, double c);
class Node
{
......@@ -28,6 +26,7 @@ class Node
int N; // Visit count
int action; // Action taken by the parent
int end_date; //end date of the task
std::vector<Node*> children;
Node *parent;
......@@ -35,7 +34,7 @@ class Node
int id; // Identificator
Node(double R, double P, int action, Node* parent);
Node(double R, double P, int action, int end_date, Node* parent);
~Node();
/**
......@@ -43,7 +42,7 @@ class Node
* @param Prior_proba : Prior probability
* @return : Pointer to the created node
**/
Node* add_child(double reward, double prior_proba, int action);
Node* add_child(double reward, double prior_proba, int action, int end_date);
void update(double v);
......
......@@ -6,12 +6,9 @@
#include "instance.h"
#define NEW_PERIOD -1
#define ENDED -2
Solution::Solution(const Instance& inst): data(inst),sequence(inst.nb_tasks, -1),m_positions(inst.nb_tasks, -1),
m_starts(inst.nb_tasks, 0),m_max_lateness(inst.nb_tasks, 0),m_trains(inst.nb_tasks, 0),m_size(0)
m_starts(inst.nb_tasks, 0),m_max_tardiness(inst.nb_tasks, 0),m_trains(inst.nb_tasks, 0),m_size(0)
{
m_trains[0] = 0;
}
......@@ -40,25 +37,25 @@ int Solution::end_of_task(const int task)const
{
return end(position(task));
}
int Solution::lateness(const int pos)const
int Solution::tardiness(const int pos)const
{
return std::max(0, end(pos) - data.due_date(sequence[pos]));
}
int Solution::lmax() const
{
return max_lateness(size()-1);
return max_tardiness(size()-1);
}
int Solution::max_lateness(const int pos)const
int Solution::max_tardiness(const int pos)const
{
return m_max_lateness[pos];
return m_max_tardiness[pos];
}
int Solution::marg_incr_max_lateness(const int pos, const int apos)const
int Solution::marg_incr_max_tardiness(const int pos, const int apos)const
{
if(pos < 0)
{
return max_lateness(apos);
return max_tardiness(apos);
}
return max_lateness(apos) - max_lateness(pos);
return max_tardiness(apos) - max_tardiness(pos);
}
int Solution::train(const int pos)const
{
......@@ -84,13 +81,13 @@ void Solution::append(const int task)
{
// std::cout << "Add start date as the release date : " << data.release_date(task) <<std::endl;
m_starts[size()] = data.release_date(task);
m_max_lateness[size()] = lateness(0);
m_max_tardiness[size()] = tardiness(0);
}
else
{
// std::cout << "start is " < <<std::endl;
m_starts[size()] = std::max(end(size()-1) + data.distance(sequence[size()-1], task), data.release_date(task));
m_max_lateness[size()] = std::max(lateness(size()), max_lateness(size()-1));
m_max_tardiness[size()] = std::max(tardiness(size()), max_tardiness(size()-1));
}
if(size() < data.nb_tasks-1)
......@@ -103,7 +100,7 @@ void Solution::append(const int task)
// }
// std::cout << std::endl;
// std::cout<<" Lateness : ";
// for(auto t : m_max_lateness){
// for(auto t : m_max_tardiness){
// std::cout << t << " ";
// }
// std::cout << std::endl;
......@@ -165,8 +162,8 @@ int Solution::is_relax_valid() const
// std::cout << data.task_to_string(task) << " | " << i << " " << time_acc << " " << train_acc << std::endl;
int lateness = std::max(0, time_acc - data.due_date(sequence[i]));
Lmax = std::max(Lmax, lateness);
int tardiness = std::max(0, time_acc - data.due_date(sequence[i]));
Lmax = std::max(Lmax, tardiness);
if(train_acc > data.T_max)
{
......@@ -204,7 +201,8 @@ int Solution::is_relax_valid() const
std::ostream& operator << (std::ostream& out, const Solution& sol)
{
out << "Date : " << sol.end(sol.size()-1) << " / train : " << sol.train(sol.size()) << std::endl;;
out << "Size = " << sol.size() << std::endl;
out << "Sequence : ";
for(int i = 0; i < sol.size(); ++i)
{
out << sol.sequence[i] << " ";
......@@ -214,9 +212,9 @@ std::ostream& operator << (std::ostream& out, const Solution& sol)
State::State(const Instance& inst): data(inst), sol(data), last_task(-1), train_size(0), time(0), tardiness(0),
nexts(data.nb_components, NEW_PERIOD), curr_period(data.nb_components, 0), lst(data.nb_tasks, 0),
go_back(data.nb_components, false), is_valid(true)
State::State(const Instance& inst): data(inst), date(0), tardiness(0), last_action(NO_ACTION), train_size(0),
nexts(data.nb_components, NEW_PERIOD), curr_period(data.nb_components, 0), _lst(data.nb_tasks, 0),
go_back(data.nb_components, false)
{
// compute lst for each components pickup
for(int comp = 0; comp < data.nb_components; ++comp)
......@@ -225,36 +223,35 @@ State::State(const Instance& inst): data(inst), sol(data), last_task(-1), train_
int durations = data.duration(pick_idx) + data.duration(pick_idx+1) + data.duration(pick_idx+2) + data.duration(pick_idx+3);
int travels = data.distance(pick_idx, pick_idx+1) + data.distance(pick_idx+1, pick_idx+2) + data.distance(pick_idx+2, pick_idx+3);
lst[comp] = data.due_date(pick_idx+3) - travels - durations;
_lst[comp] = data.due_date(pick_idx+3) - travels - durations;
}
}
/** Update internal state **/
void State::update(int task)
{
int comp = data.component(task);
int reward = 0;
int dist = 0;