Commit ab49b87f authored by Valentin Antuori's avatar Valentin Antuori
Browse files

mcts pointer version

parent 886c7bc4
......@@ -40,7 +40,7 @@ target_link_libraries(main "${TORCH_LIBRARIES}")
set_target_properties(main PROPERTIES
CXX_STANDARD 17
CXX_STANDARD 14
CXX_STANDARD_REQUIRED ON
CXX_EXTENSIONS ON
)
......
......@@ -83,36 +83,40 @@ void build(Solution& sol, State& s)
for(int i = sol.size(); i < sol.data->nb_tasks; ++i)
{
if(DEBUG_H)
std::cerr << "step "<< i << std::endl;
append_action(sol, s);
}
if(DEBUG_H)
std::cerr << "End Build" << std::endl;
}
vector<int> actions = s.get_actions();
void append_action(Solution& sol, State& s)
{
vector<int> actions = s.get_actions();
if(DEBUG_H)
std::cerr << "Candidates : " << std::endl;
if(DEBUG_H)
std::cerr << "Candidates : " << std::endl;
// compute proba;
vector<double> proba = get_distribution(*sol.data, s, actions);
if(DEBUG_H)
for(size_t i = 0; i < proba.size(); ++i){
std::cerr << actions[i] << " : " << proba[i] << std::endl;
}
// compute proba;
vector<double> proba = get_distribution(*sol.data, s, actions);
if(DEBUG_H)
for(size_t i = 0; i < proba.size(); ++i){
std::cerr << actions[i] << " : " << proba[i] << std::endl;
}
int idx = choice(proba);
int idx = choice(proba);
sol.append(actions[idx]);
s.update(actions[idx]);
sol.append(actions[idx]);
s.update(actions[idx]);
if(DEBUG_H){
std::cerr << s << std::endl;
std::cerr << sol << std::endl;
std::cerr << " --- " << std::endl;
}
if(DEBUG_H){
std::cerr << s << std::endl;
std::cerr << sol << std::endl;
std::cerr << " --- " << std::endl;
}
if(DEBUG_H)
std::cerr << "End Build" << std::endl;
}
......
......@@ -24,6 +24,11 @@ void build(Solution& sol);
*/
void build(Solution& sol, State& state);
/**
* One iteration
*/
void append_action(Solution& sol, State& state);
/**
* Perform a random sample
*/
......
......@@ -21,6 +21,8 @@
#include "mcts.h"
#include "heuristic.h"
#include "test.h"
#include <unistd.h>
......@@ -31,11 +33,11 @@ void random_tree()
{
std::cout << " =============== " << std::endl;
Node::count = 0;
Node root(0, 0, -1, 0, nullptr);
Node root(0, -1, 0, nullptr);
// vector<Node*> nodes_vector;
// nodes_vector.push_back(&root);
for(auto i = 0; i < 10; ++i){
root.add_child(/*reward=*/0, 0, 0, 0);
root.add_child(0, 0, 0);
// nodes_vector.push_back(root.children.back());
}
{
......@@ -52,7 +54,7 @@ void random_tree()
int nb_children = rand() % 8 + 3;
for(int i = 0; i < nb_children; ++i)
{
current->add_child(/*reward=*/0, 0, 0, 0);
current->add_child(0, 0, 0);
nexts.push_back(current->children.back());
// nodes_vector.push_back(current->children.back());
nb_node++;
......@@ -85,21 +87,19 @@ void random_tree()
{
const int MAX_NODE = 4491163 + 247541 + 434232 + 786651 + 396212;
int nb_node = 1;
// int nb_node = 1;
for(int i = 0; i < MAX_NODE; ++i)
root.add_child(0, 0, 0, 0);
root.add_child(0, 0, 0);
}
sleep(5);
std::cout << "Adding a child to a new node" << std::endl;
root.children.back()->add_child(0, 0, 0, 0);
root.children.back()->add_child(0, 0, 0);
std::cout << std::endl;
std::cout << "Root has " << root.children.size() << " children" << std::endl;
delete root.children.back()->children[0];
std::cout << "Root has " << root.children.size() << " children" << std::endl;
}
void greedy_sample_stat(double temperature, Instance& data, int nb_iter)
......@@ -108,7 +108,7 @@ void greedy_sample_stat(double temperature, Instance& data, int nb_iter)
set_default_temperature(temperature);
long lmax_sum = 0;
long depth_sum = 0;
int best_obj = std::numeric_limits<std::int32_t>::max();;
int best_obj = std::numeric_limits<std::int32_t>::max();
int best_depth = std::numeric_limits<std::int32_t>::min();;
for(int i = 0; i < nb_iter; ++i){
Solution sol(&data);
......@@ -120,7 +120,7 @@ void greedy_sample_stat(double temperature, Instance& data, int nb_iter)
best_obj = sol.lmax();
}
int depth;
int depth = 0;
if(sol.lmax() > 0){
for(int j = 0; j < sol.size(); ++j){
if(sol.tardiness(j) > 0){
......@@ -231,66 +231,10 @@ int main(int argc, char **argv){
std::cout << sol.lmax() << std::endl;
}
else{
/*
int NB_ITER = 1000;
{
std::cout << "Random sample" << std::endl;
long lmax_sum = 0;
long depth_sum = 0;
int best_obj = INT_MAX;
int best_depth = INT_MIN;
for(int i = 0; i < NB_ITER; ++i){
std::unique_ptr<Solution> sol = random_sample(data[0]);
lmax_sum += sol->lmax();
if(sol->lmax() < best_obj)
{
best_obj = sol->lmax();
}
int depth;
if(sol->lmax() > 0){
for(int j = 0; j < sol->size(); ++j){
if(sol->tardiness(j) > 0){
depth = j+1;
break;
}
}
}
depth_sum += depth;
if(depth > best_depth)
{
best_depth = depth;
}
}
std::cout << NB_ITER << std::endl << "- lmax = " << (double)lmax_sum/NB_ITER << std::endl;
std::cout << "- depth = " << (double)depth_sum/NB_ITER << std::endl;
std::cout << "- Best = " << best_obj << std::endl;
std::cout << "- Best depth = " << best_depth << std::endl;
}
greedy_sample_stat(1, data[0], NB_ITER);
greedy_sample_stat(0.1, data[0], NB_ITER);
greedy_sample_stat(0.01, data[0], NB_ITER);
*/
// test tree structure
// sleep(3);
//random_tree();
// auto sol = std::make_unique<Solution>(data[0]);
// for(int i = 0; i < 10000000; ++i)
// {
// State s(data[0]);
// auto sol2 = std::make_unique<Solution>(data[0]);
// // auto sol2 = new Solution(data[0]);
// build(data[0], *sol2, s);
// sol = std::move(sol2);
// std::cerr << "Obj=" <<sol->lmax() << std::endl;
// }
set_default_temperature(0.01);
// test_sample(data[0], opt.stat);
// solve_by_sample(data[0], opt.stat);
range_children(data[0], opt.stat);
}
// std::cout << "Fin" << std::endl;
......
This diff is collapsed.
......@@ -26,6 +26,11 @@ Solution solve(const Instance& data, double c, bool print_stat, int timeout, int
*/
int samples(Node& current_node, const Solution& sol, const State& s, Solution& best_sol);
/**
* Unique sample from partial solution and state
*/
void sample(Solution& sol, State& s);
class Node
{
......@@ -33,19 +38,14 @@ class Node
static int count;
/**/
double R; // Immediate reward on the edge from parent to this (0 for lmax)
double W; // Sum of Rewards
//double avg_W; // Q-value estimation
double P; // Prior probability
int N; // Visit count
double dyna_std_dev;
double dyna_variance_N; // = variance * N
double dyna_mean;
double mean_lmax;
double mean_depth;
// Use in static way
double std_dev;
double mean;
int best_obj;
int action; // Action taken by the parent
int end_date; //end date of the task
......@@ -55,9 +55,14 @@ class Node
int id; // Identificator
int depth;
Node(double R, double P, int action, int end_date, Node* parent);
int max_depth; // maximum depth under this node
int nb_nodes_subtree; // Size of the subtree
double Q;
// Node();
Node(double proba, int action, int end_date, Node* parent);
Node(Node&& n) noexcept;
Node(const Node&);
~Node();
/**
......@@ -65,18 +70,23 @@ class Node
* @param Prior_proba : Prior probability
* @return : Pointer to the created node
**/
void add_child(double reward, double prior_proba, int action, int end_date);
void add_child(double prior_proba, int action, int end_date);
void add_child(Node* child);
void update(double v);
void update(double v, const Solution& sol, int sub_tree_depth, int added_node);
bool is_leaf();
bool is_root();
double get_Q();
void delete_node();
friend std::ostream& operator<<(std::ostream& os, const Node& dt);
private :
// boolean for destructor should be false, exepct when the node will be deleted
// boolean for destructor should be false, exept when the node is going to be deleted
bool is_collected;
};
......
......@@ -167,6 +167,11 @@ ChariotOptions parse(int argc, char *argv[]) {
"double");
cmd.add<ValueArg<int>>(
opt.stat, "", "stat",
"stat to display (0:mean,1:median,2:decile", false,
0, "int");
cmd.add<ValueArg<int>>(
opt.mcts_backup, "b", "backup",
"backup value (0:mean,1:best", false,
......
......@@ -39,6 +39,8 @@ public:
double c;
int mcts_backup;
int stat;
double time;
ChariotOptions(){};
......
......@@ -248,6 +248,8 @@ nn::Sequential construct_policy(const torch::Device &device)
// const auto action_space = data.nb_components;
auto policy = nn::Sequential(
// nn::Linear(nn::LinearOptions(4, 4).bias(false)),
// nn::Functional(torch::relu),
nn::Linear(nn::LinearOptions(4, 1).bias(false))
);
......@@ -282,12 +284,12 @@ int choice(const Tensor &probs)
bool display_probs = false;
std::tuple<int, Tensor> select_action(nn::Sequential &policy, const Tensor &state, const State_rl& s)
{
auto tick = std::chrono::steady_clock::now();
auto tick = std::chrono::steady_clock::now();
const auto tmp = policy->forward(state);
_forward_forward += ((std::chrono::duration<double>)(std::chrono::steady_clock::now() - tick)).count();
tick = std::chrono::steady_clock::now();
const auto probs = torch::softmax((1-tmp)/temperature, /*dim=*/0);
const auto probs = torch::softmax((tmp), /*dim=*/0);
_forward_softmax += ((std::chrono::duration<double>)(std::chrono::steady_clock::now() - tick)).count();
......@@ -346,7 +348,7 @@ std::tuple<Tensor, Tensor, Solution > play_episode(nn::Sequential &policy, const
}
tick = std::chrono::steady_clock::now();
auto state_tensor = s.to_tensor().to(device);
auto state_tensor = s.to_tensor();
to_tensor_time += (std::chrono::steady_clock::now() - tick);
tick = std::chrono::steady_clock::now();
......@@ -417,7 +419,8 @@ void improve_policy(nn::Sequential &policy, const vector<Instance>& data, const
// torch::autograd::GradMode::set_enabled(false);
int episode_per_instances = episode / data.size();
std::cout << "episode per instances = " << episode_per_instances << std::endl;
// torch::nn::no_grad(policy->parameters();
// policy->parameters().requires_grad(false);
for (int i = 0; i < epoch; ++i) {
std::cout << std::endl <<" ------------------- " << std::endl;
......@@ -477,9 +480,9 @@ void improve_policy(nn::Sequential &policy, const vector<Instance>& data, const
auto begin = std::chrono::steady_clock::now();
optimizer.zero_grad();
auto loss = loss_epoch / (episode * static_cast<int>(data.size()));
auto loss = loss_epoch / (static_cast<int>(episode_per_instances * data.size()));
std::cout << "Epoch " << i << " : " << lmax_sum/(episode * static_cast<int>(data.size())) << std::endl;
std::cout << "Epoch " << i << " : " << lmax_sum/(static_cast<int>(episode_per_instances * data.size())) << std::endl;
std::cout << "Param : " << policy->parameters() << std::endl << std::endl;
std::cout << "Backward..." << std::endl;
......
......@@ -7,12 +7,14 @@
Solution::Solution(const Instance* inst): data(inst),sequence(data->nb_tasks, -1),m_positions(data->nb_tasks, -1),
m_starts(data->nb_tasks, 0),m_max_tardiness(data->nb_tasks, 0),m_trains(data->nb_tasks, 0),m_size(0)
m_starts(data->nb_tasks, 0),m_max_tardiness(data->nb_tasks, 0),m_trains(data->nb_tasks, 0),m_size(0), m_sum_tardiness(0),
m_first_tardiness(-1)
{
m_trains[0] = 0;
}
Solution::Solution(const Solution& sol) : data(sol.data),sequence(sol.sequence), m_positions(sol.m_positions),
m_starts(sol.m_starts),m_max_tardiness(sol.m_max_tardiness),m_trains(sol.m_trains),m_size(sol.m_size)
m_starts(sol.m_starts),m_max_tardiness(sol.m_max_tardiness),m_trains(sol.m_trains),m_size(sol.m_size),
m_sum_tardiness(sol.m_first_tardiness),m_first_tardiness(sol.m_first_tardiness)
{ }
int Solution::size() const
......@@ -96,22 +98,12 @@ void Solution::append(const int task)
{
m_trains[size()+1] = train(size()) + data->train_use(task);
}
// std::cout<<" Start : ";
// for(auto t : m_starts){
// std::cout << t << " ";
// }
// std::cout << std::endl;
// std::cout<<" Lateness : ";
// for(auto t : m_max_tardiness){
// std::cout << t << " ";
// }
// std::cout << std::endl;
// std::cout<<" Train : ";
// for(auto t : m_trains){
// std::cout << t << " ";
// }
// std::cout << std::endl;
m_sum_tardiness += tardiness(m_size);
if(m_first_tardiness < 0 && m_sum_tardiness > 0)
{
m_first_tardiness = m_size;
}
m_size++;
}
......@@ -201,6 +193,15 @@ int Solution::is_relax_valid() const
return Lmax;
}
int Solution::first_tardiness() const
{
return m_first_tardiness;
}
int Solution::sum_tardiness() const
{
return m_sum_tardiness;
}
void Solution::complete_print() const
{
int time = 0;
......
......@@ -54,6 +54,9 @@ class Solution
void complete_print() const;
int first_tardiness() const;
int sum_tardiness() const;
friend std::ostream& operator << (std::ostream& out, const Solution& sol);
......@@ -70,6 +73,8 @@ class Solution
private:
/** size of the solution **/
int m_size;
int m_sum_tardiness;
int m_first_tardiness;
};
class State
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment