Commit f083c0a3 authored by Valentin Antuori's avatar Valentin Antuori
Browse files

add mcts algorithm structure

parent a6a01f53
......@@ -21,6 +21,7 @@ set(SRCS
src/solution.cpp
src/options.cpp
src/mcts.cpp
src/heuristic.cpp
)
......@@ -34,6 +35,7 @@ set(HEADERS
src/solution.h
src/options.h
src/mcts.h
src/heuristic.h
${TCLAP}
)
......
#include <string>
#include "heuristic.h"
int choice(std::vector<double> proba)
{
double r = (double)rand() / RAND_MAX;
int i;
for (i = 0; i < proba.size(); ++i) {
const double prob = proba[i];
if (r <= prob)
return i;
else
r -= prob;
}
throw std::string("Invalid probability vector");
}
void build()
std::unique_ptr<Solution> build(const Instance& data)
{
}
void build(const Instance& data, Solution& sol, State& state)
{
}
vector<double> get_distrib(const State s&)
std::vector<double> get_distribution(const State& s)
{
vector<double>
}
// std::vector<double> get_distrib(const State& s)
// {
// std::vector<double>
// int counter = 0;
int size = tasks_list.size();
torch::Tensor t = torch::empty({size, 4}, torch::kFloat);
// std::cout << "candidat : " << std::endl;
if(trace_flag){
std::cout << "candidate : " << std::endl;
}
for(uint i = 0; i < tasks_list.size(); ++i){
int task = tasks_list[i];
int comp = data.component(task);
// // int counter = 0;
// int size = tasks_list.size();
// torch::Tensor t = torch::empty({size, 4}, torch::kFloat);
// // std::cout << "candidat : " << std::endl;
// if(trace_flag){
// std::cout << "candidate : " << std::endl;
// }
// for(uint i = 0; i < tasks_list.size(); ++i){
// int task = tasks_list[i];
// int comp = data.component(task);
int dist = 0;
if(last_task > 0)
dist = data.distance(last_task, task);
// int dist = 0;
// if(last_task > 0)
// dist = data.distance(last_task, task);
int slack = lst[comp] - std::max(data.release_date(task), (time - tardiness) + dist);
// int slack = lst[comp] - std::max(data.release_date(task), (time - tardiness) + dist);
int pick_1 = data.is_pickup(task) ? 1 : 0;
// int pick_1 = data.is_pickup(task) ? 1 : 0;
t[i][0] = (float)slack / data.max_slack;
t[i][1] = (float)std::max(dist, data.release_date(task) - (time - tardiness)) / data.max_dist;
t[i][2] = 1 - (float)data.trolley_length(task) / data.T_max;
t[i][3] = (float)pick_1;
if(trace_flag){
std::cout << tasks_list[i] << "(" << (float)slack / data.max_slack << " " << (float)std::max(dist, data.release_date(task) - (time - tardiness)) / data.max_dist
<< " " << 1-(float)data.trolley_length(task) / data.T_max << " " << pick_1 << ")" << std::endl;
}
}
// t[i][0] = (float)slack / data.max_slack;
// t[i][1] = (float)std::max(dist, data.release_date(task) - (time - tardiness)) / data.max_dist;
// t[i][2] = 1 - (float)data.trolley_length(task) / data.T_max;
// t[i][3] = (float)pick_1;
// if(trace_flag){
// std::cout << tasks_list[i] << "(" << (float)slack / data.max_slack << " " << (float)std::max(dist, data.release_date(task) - (time - tardiness)) / data.max_dist
// << " " << 1-(float)data.trolley_length(task) / data.T_max << " " << pick_1 << ")" << std::endl;
// }
// }
return t;
// return t;
}
\ No newline at end of file
// }
\ No newline at end of file
#ifndef INSTANCE_DEF
#define INSTANCE_DEF
#ifndef HEURISTIC_DEF
#define HEURISTIC_DEF
#include <memory>
#include <vector>
#include "solution.h"
#include "instance.h"
std::unique_ptr<Solution> build(const Instance& data);
void build();
vector<double> get_distrib(const State s&);
void build(const Instance& data, Solution& sol, State& state);
class State
{
public:
const Instance data;
std::vector<double> get_distribution(const State& s);
/**
* return action number in {0, 1, ..., |proba|}
*/
int choice(std::vector<double> proba);
Solution sol;
int last_task;
int train_size;
int time;
int tardiness;
std::vector<int> nexts;
std::vector<int> curr_period;
std::vector<int> lst;
std::vector<bool> go_back; // internal state for tracking periods
bool is_valid;
State(const Instance& inst): data(inst), sol(data), last_task(-1), train_size(0), time(0), tardiness(0),
nexts(data.nb_components, NEW_PERIOD), curr_period(data.nb_components, 0), lst(data.nb_tasks, 0),
go_back(data.nb_components, false), is_valid(true)
{
// compute lst for each components pickup
for(int comp = 0; comp < data.nb_components; ++comp)
{
int pick_idx = data.get_pickfull_idx(comp, 0);
int durations = data.duration(pick_idx) + data.duration(pick_idx+1) + data.duration(pick_idx+2) + data.duration(pick_idx+3);
int travels = data.distance(pick_idx, pick_idx+1) + data.distance(pick_idx+1, pick_idx+2) + data.distance(pick_idx+2, pick_idx+3);
lst[comp] = data.due_date(pick_idx+3) - travels - durations;
}
}
/** Update internal state **/
int update(int task)
{
int comp = data.component(task);
int reward = 0;
int dist = 0;
if(last_task > 0){
dist = data.distance(last_task, task);
}
train_size += data.train_use(task);
time = std::max(time + dist + data.duration(task), data.release_date(task));
// compute the "look ahead" tardiness
int tmp_tard = std::max(0, time - (lst[comp] + data.duration(task)));
if(tmp_tard > tardiness){
// the reward is the marginal increasing tardiness
reward = -(tmp_tard - tardiness);
tardiness = tmp_tard;
}
if(nexts[comp] == NEW_PERIOD || data.is_pickup(task)){
nexts[comp] = task+1;
}else{
// Delivery chosen
if(go_back[comp]){
// go to the next periods if exist
curr_period[comp]++;
if(curr_period[comp] >= data.get_nb_periods(comp)){
// No new period for this component
nexts[comp] = ENDED;
}else{
nexts[comp] = NEW_PERIOD;
}
go_back[comp] = false;
}else{
go_back[comp] = true;
// next task is a the other pickup
if(data.is_empty_operation(task)){
nexts[comp] = data.get_pickfull_idx(comp, curr_period[comp]);
}else{
nexts[comp] = data.get_pickempty_idx(comp, curr_period[comp]);
}
}
}
// then compute the lst
if(nexts[comp] == NEW_PERIOD){
int pick_idx = data.get_pickfull_idx(comp, curr_period[comp]);
int durations = data.duration(pick_idx) + data.duration(pick_idx+1) + data.duration(pick_idx+2) + data.duration(pick_idx+3);
int travels = data.distance(pick_idx, pick_idx+1) + data.distance(pick_idx+1, pick_idx+2) + data.distance(pick_idx+2, pick_idx+3);
lst[comp] = data.due_date(pick_idx+3) - travels - durations;
}else if(nexts[comp] != ENDED){
lst[comp] += data.duration(task);
lst[comp] += data.distance(task, nexts[comp]);
}
last_task = task;
return reward;
}
};
......
......@@ -7,8 +7,10 @@
#include <experimental/filesystem>
#include <typeinfo>
#include <list>
#include <torch/torch.h>
#include <stdlib.h>
#include "instance.h"
#include "solution.h"
......@@ -19,6 +21,7 @@
namespace fs = std::experimental::filesystem;
using namespace std;
// int Node::count = 0;
int main(int argc, char **argv){
ChariotOptions opt = parse(argc, argv);
......@@ -131,57 +134,46 @@ int main(int argc, char **argv){
}
else if(opt.mcts)
{
{
Node root = Node(0, 0);
Node* n = &root;
for(int i = 0; i < opt.seed; ++i){
n = n->add_child(0, 0);
}
}
std::cout << " --- " << std::endl;
{
Node root = Node(0, 0);
Node* n = &root;
for(int i = 0; i < opt.seed; ++i){
n = n->add_child(0, 0);
}
}
std::cout << " --- " << std::endl;
{
Node root = Node(0, 0);
Node* n = &root;
for(int i = 0; i < opt.seed; ++i){
n = n->add_child(0, 0);
}
}
std::cout << " --- " << std::endl;
{
Node root = Node(0, 0);
Node* n = &root;
for(int i = 0; i < opt.seed; ++i){
n = n->add_child(0, 0);
}
}
// root.remove();
// n = &root;
// for(int i = 0; i < opt.seed; ++i){
// n = n->add_child(0, 0);
srand (opt.seed);
// {
// Node root = Node(0, 0);
// list<Node*> node_list{&root};
// int count = 0;
// while(!node_list.empty()){
// Node* n = node_list.front();
// node_list.pop_front();
// int nb_child = rand() % 15 + 1;
// if(count < opt.seed){
// for(int i = 0; i < nb_child; ++i){
// Node* tmp = n->add_child(0, 0);
// node_list.push_back(tmp);
// count++;
// }
// }
// }
// }
// root.remove();
// n = &root;
// for(int i = 0; i < opt.seed; ++i){
// n = n->add_child(0, 0);
// std::cout << " --- " << std::endl;
// {
// Node root = Node(0, 0);
// list<Node*> node_list{&root};
// int count = 0;
// while(!node_list.empty()){
// Node* n = node_list.front();
// node_list.pop_front();
// int nb_child = rand() % 15 + 1;
// if(count < opt.seed){
// for(int i = 0; i < nb_child; ++i){
// Node* tmp = n->add_child(0, 0);
// node_list.push_back(tmp);
// count++;
// }
// }
// }
// }
// root.remove()
}
std::cout << "Fin" << std::endl;
......
#include <list>
#include <limits>
#include <vector>
#include <stdlib.h>
#include <math.h>
#include "mcts.h"
#include "solution.h"
#include "heuristic.h"
......@@ -10,64 +16,81 @@
Main function, run the algorithm on the instance in data
@param data : the instance to solve
*/
void solve(const Instance& data)
std::unique_ptr<Solution> solve(const Instance& data, double c)
{
// initialize best_sol with heuristics
std::unique_ptr<Solution> best_sol = build(data);
// create root node and all its children
Node root(0, 0);
Node root(0, 0, -1, nullptr);
// while(true)
while(true)
{
// create a solution
Solution sol{data};
// selection
Node* leaf = selection(&root);
// expand
expand(leaf);
// simulation
simulation(leaf);
// backup
backup(leaf);
auto sol = std::make_unique<Solution>(data);
// Need a state representation (in order to compute next available action)
State s(data);
/////////////////////////////
///////// SELECTION /////////
/////////////////////////////
Node* current_node = &root;
while(!current_node->is_leaf())
{
Node* best = nullptr;
double max_ucb = -std::numeric_limits<float>::infinity();
// compute ucb for each children and keep the best one
for(auto child : current_node->children){
double ucb = child->Q + c * child->P * sqrt(current_node->N) / 1 + child->N;
if(ucb > max_ucb){
max_ucb = ucb;
best = child;
}
}
// Update state
s.update(best->action);
sol->append(best->action);
current_node = best;
}
}
}
/**
Selection phase, start from the root and return a leaf node following *PUCT*
*/
Node* selection(Node* node)
{
while(!node->is_leaf)
{
////////////////////////////
///////// EXPAND /////////
////////////////////////////
std::vector<double> action_distribution = get_distribution(s);
for(int i = 0; i < s.actions.size(); ++i){
current_node->add_child(/*reward=*/0, action_distribution[i], s.actions[i]);
}
}
return new Node(0, 0);
}
/**
Expand phase, add the children to the leaf, and compute prior probability
*/
void expand(Node* leaf)
{
}
/**
Run a simulation to extend a solution until the end
*/
void simulation(Node* simu_start)
{
}
/**
Backpropagation phase
*/
void backup(Node* simu_start)
{
///////////////////////////////
////////// SIMULATION /////////
///////////////////////////////
//choose the next randomly here, as we already have the distribution
int action = choice(action_distribution);
sol->append(s.actions[action]);
s.update(s.actions[action]);
// use build fonction from solution and state after
build(data, *sol, s);
////////////////////////////
///////// BACKUP /////////
////////////////////////////
double probe_value = sol->lmax();
while(!current_node->is_root()){
current_node->update(probe_value);
current_node = current_node->parent;
}
/////////
// END //
/////////
if(sol->lmax() < best_sol->lmax())
{
best_sol = std::move(sol);
}
}
return best_sol;
}
......@@ -75,19 +98,25 @@ void backup(Node* simu_start)
/// Class Node ///
/////////////////////////
int Node::count = 0;
Node* Node::add_child(double reward, double prior_proba) {
Node *n = new Node(reward, prior_proba);
Node::Node(double R, double P, int action, Node* parent)
:R(R), W(0), Q(0), P(P), N(0), action(action), parent(parent)
{ id = count++; }
Node* Node::add_child(double reward, double prior_proba, int action) {
Node *n = new Node(reward, prior_proba, action, this);
children.push_back(n);
return n;
}
Node::Node(double R, double P)
:R(R), W(0), Q(0), P(P), N(0)
{ id = count++; }
Node::~Node()
{
if(children.size() > 0)
{
std::cout << "Destructor" << std::endl;
}
std::list<Node*> to_delete(children.begin(), children.end());
std::list<Node*> nexts(to_delete);
......@@ -103,7 +132,7 @@ Node::~Node()
// clear the sub node vector here !!!
current->children.clear();
}
// Then delete all the collected nodes
// Then delete all the collected nodes (I don't remember why reverse order is or was relevant ?)
for (auto it {to_delete.rbegin() }; it != to_delete.rend(); ++it)
{
delete (*it);
......@@ -111,10 +140,21 @@ Node::~Node()
children.clear();
}
bool is_leaf()
bool Node::is_leaf()
{
return children.empty();
}
bool Node::is_root()
{
return parent != nullptr;
}
void Node::update(double v)
{
N++;
W += v;
Q = W/N;
}
// Node::count = 0;
std::ostream& operator<<(std::ostream& os, const Node& n)
......
......@@ -4,31 +4,16 @@
#include <iostream>
#include <typeinfo>
#include <vector>
#include <memory>
#include "instance.h"
#include "solution.h"
/**
Main function, run the algorithm on the instance in data
@param data : the instance to solve
*/
void solve(const Instance& data);
/**
Selection phase, start from the root and return a leaf node following *PUCT*
*/
Node* selection(Node* root);
/**
Expand phase, add the children to the leaf, and compute prior probability
*/
void expand(Node* leaf);
/**
Run a simulation to extend a solution until the end
*/
void simulation(Node* simu_start);
/**
Backpropagation phase
*/
void backup(Node* simu_start);
std::unique_ptr<Solution> solve(const Instance& data);
class Node
{
......@@ -41,15 +26,16 @@ class Node
double Q; // Q-value estimation
double P; // Prior probability
int N; // Visit count
int action; // Action taken by the parent
std::vector<Node*> children;
Node *parent;
int id; // Identificaor
int id; // Identificator
int action; // Action taken by the parent
Node(double R, double P);
Node(double R, double P, int action, Node* parent);
~Node();
/**
......@@ -57,9 +43,12 @@ class Node
* @param Prior_proba : Prior probability
* @return : Pointer to the created node
**/
Node* add_child(double reward, double prior_proba);
Node* add_child(double reward, double prior_proba, int action);
void update(double v);