Commit e16e02fb authored by Valentin Antuori's avatar Valentin Antuori
Browse files

update policy gradient

parent dd27f452
......@@ -10,12 +10,14 @@
#include <list>
#include <torch/torch.h>
#include "policy-gradient.h"
#include <stdlib.h>
#include <limits>
#include "instance.h"
#include "solution.h"
#include "options.h"
#include "policy-gradient.h"
#include "mcts.h"
#include "heuristic.h"
......@@ -106,21 +108,22 @@ void greedy_sample_stat(double temperature, Instance& data, int nb_iter)
set_default_temperature(temperature);
long lmax_sum = 0;
long depth_sum = 0;
int best_obj = INT_MAX;
int best_depth = INT_MIN;
int best_obj = std::numeric_limits<std::int32_t>::max();;
int best_depth = std::numeric_limits<std::int32_t>::min();;
for(int i = 0; i < nb_iter; ++i){
std::unique_ptr<Solution> sol = build(data);
Solution sol(&data);
build(sol);
lmax_sum += sol->lmax();
if(sol->lmax() < best_obj)
lmax_sum += sol.lmax();
if(sol.lmax() < best_obj)
{
best_obj = sol->lmax();
best_obj = sol.lmax();
}
int depth;
if(sol->lmax() > 0){
for(int j = 0; j < sol->size(); ++j){
if(sol->tardiness(j) > 0){
if(sol.lmax() > 0){
for(int j = 0; j < sol.size(); ++j){
if(sol.tardiness(j) > 0){
depth = j+1;
break;
}
......@@ -187,6 +190,14 @@ int main(int argc, char **argv){
const auto device = torch::kCPU;
auto policy = construct_policy(device);
std::cout << " *********************** " << std::endl << " *********************** " << std::endl;
std::cout << policy[0]->parameters() << std::endl;
std::cout << " *********************** " << std::endl << " *********************** " << std::endl;
int nb_tasks_sum = 0;
for(auto inst : data){
nb_tasks_sum += inst.nb_tasks;
......@@ -198,24 +209,26 @@ int main(int argc, char **argv){
auto optimizer = torch::optim::Adam(policy->parameters(), /*lr=*/learning_rate);
std::cout << "optimizer : " << typeid(optimizer).name() << std::endl;
set_temperature(opt.temperature);
improve_policy(policy, data, device, optimizer, opt.nb_episode, opt.batch_size);
improve_policy(policy, data, device, optimizer, opt.epoch, opt.episode);
}else{
auto learning_rate = pow(2, -12) / (nb_tasks_sum / data.size());
auto optimizer = torch::optim::SGD(policy->parameters(), /*lr=*/learning_rate);
std::cout << "optimizer : " << typeid(optimizer).name() << std::endl;
set_temperature(opt.temperature);
improve_policy(policy, data, device, optimizer, opt.nb_episode, opt.batch_size);
improve_policy(policy, data, device, optimizer, opt.epoch, opt.episode);
}
}
else if(opt.mcts)
else
if(opt.mcts)
{
std::unique_ptr<Solution> sol = solve(data[0], opt.c,/*print_stat=*/true, opt.time, opt.mcts_backup);
Solution sol = solve(data[0], opt.c,/*print_stat=*/true, opt.time, opt.mcts_backup);
}
else if(opt.heuristic)
{
std::unique_ptr<Solution> best_sol = build(data[0]);
std::cout << best_sol->lmax() << std::endl;
Solution sol(&data[0]);
build(sol);
std::cout << sol.lmax() << std::endl;
}
else{
/*
......@@ -265,17 +278,18 @@ int main(int argc, char **argv){
// sleep(3);
//random_tree();
auto sol = std::make_unique<Solution>(data[0]);
// auto sol = std::make_unique<Solution>(data[0]);
// for(int i = 0; i < 10000000; ++i)
// {
// State s(data[0]);
// auto sol2 = std::make_unique<Solution>(data[0]);
// // auto sol2 = new Solution(data[0]);
// build(data[0], *sol2, s);
// sol = std::move(sol2);
// std::cerr << "Obj=" <<sol->lmax() << std::endl;
// }
for(int i = 0; i < 10000000; ++i)
{
State s(data[0]);
auto sol2 = std::make_unique<Solution>(data[0]);
// auto sol2 = new Solution(data[0]);
build(data[0], *sol2, s);
sol = std::move(sol2);
std::cerr << "Obj=" <<sol->lmax() << std::endl;
}
}
......
......@@ -143,10 +143,10 @@ ChariotOptions parse(int argc, char *argv[]) {
cmd.add<ValueArg<double>>(opt.time, "", "time", "time limit in seconds", false, 3600,
"double");
cmd.add<ValueArg<double>>(opt.batch_size, "", "batch", "size of the batch", /*required=*/false, /*default=*/1,
cmd.add<ValueArg<double>>(opt.episode, "", "episode", "number of episode per epoch", /*required=*/false, /*default=*/1,
"int");
cmd.add<ValueArg<double>>(opt.nb_episode, "", "episode", "number of episode", /*required=*/false, /*default=*/1000,
cmd.add<ValueArg<double>>(opt.epoch, "", "epoch", "number of epoch", /*required=*/false, /*default=*/1000,
"int");
cmd.add<ValueArg<double>>(opt.temperature, "", "temperature", "temperature parameter", /*required=*/false, /*default=*/1,
......
......@@ -28,8 +28,8 @@ public:
bool print_cmd;
double temperature;
int batch_size;
int nb_episode;
int epoch;
int episode;
bool learn;
bool adam;
......
......@@ -198,7 +198,7 @@ class State_rl
to_vector_for1 += ((std::chrono::duration<double>)(std::chrono::steady_clock::now()-tick)).count();
tick = std::chrono::steady_clock::now();
auto options = torch::TensorOptions().dtype(at::kFloat);
auto options = torch::TensorOptions().dtype(at::kFloat);//.requires_grad(false); Already set to false by default
auto tensor = torch::from_blob(v.data(), {1, size*4}, options).clone();
......@@ -412,13 +412,13 @@ auto normalize(const Tensor &rewards) -> Tensor
// Improve the policy utilizing the policy gradient algorithm.
void improve_policy(nn::Sequential &policy, const vector<Instance>& data, const torch::Device &device, optim::Optimizer &optimizer,
int episodes/*=100*/, int batch_size/*=1*/)
int epoch/*=2000*/, int episode/*=140*/)
{
// torch::autograd::GradMode::set_enabled(false);
int real_batch_size = batch_size / data.size();
std::cout << "batch size = " << real_batch_size << std::endl;
int episode_per_instances = episode / data.size();
std::cout << "episode per instances = " << episode_per_instances << std::endl;
for (int i = 0; i < episodes; ++i) {
for (int i = 0; i < epoch; ++i) {
std::cout << std::endl <<" ------------------- " << std::endl;
// if(i == 28){
......@@ -426,10 +426,10 @@ void improve_policy(nn::Sequential &policy, const vector<Instance>& data, const
// }
double lmax_sum = 0;
torch::Tensor loss_batch = torch::zeros({1}, torch::kFloat);
torch::Tensor loss_epoch = torch::zeros({1}, torch::kFloat);
std::chrono::duration<double> loss_computation(0);
for(int j = 0; j < real_batch_size; ++j){
for(int j = 0; j < episode_per_instances; ++j){
if(trace_flag){
std::cout << " ========== " << std::endl;
std::cout << " ========== " << std::endl;
......@@ -456,7 +456,7 @@ void improve_policy(nn::Sequential &policy, const vector<Instance>& data, const
std::cout << "Returns " << returns << std::endl;
std::cout << "loss_t = " << loss_t << std::endl;
}
loss_batch.add_(loss_t);
loss_epoch.add_(loss_t);
auto end = std::chrono::steady_clock::now();
loss_computation+= (end - begin);
......@@ -477,9 +477,9 @@ void improve_policy(nn::Sequential &policy, const vector<Instance>& data, const
auto begin = std::chrono::steady_clock::now();
optimizer.zero_grad();
auto loss = loss_batch / (real_batch_size * static_cast<int>(data.size()));
auto loss = loss_epoch / (episode * static_cast<int>(data.size()));
std::cout << "Step " << i << " : " << lmax_sum/(real_batch_size * static_cast<int>(data.size())) << std::endl;
std::cout << "Epoch " << i << " : " << lmax_sum/(episode * static_cast<int>(data.size())) << std::endl;
std::cout << "Param : " << policy->parameters() << std::endl << std::endl;
std::cout << "Backward..." << std::endl;
......@@ -498,7 +498,7 @@ void improve_policy(nn::Sequential &policy, const vector<Instance>& data, const
if(debug_flag)
{
std::cout << " # # #" << std::endl;
std::cout << "Loss batch = " << loss_batch << std::endl;
std::cout << "Loss epoch = " << loss_epoch << std::endl;
std::cout << "LOSS = " << loss << std::endl;
}
......
......@@ -13,7 +13,7 @@ extern bool trace_flag;
void set_temperature(double t);
torch::nn::Sequential construct_policy(const torch::Device &device);
void improve_policy(torch::nn::Sequential &policy, const std::vector<Instance>& data, const torch::Device &device, torch::optim::Optimizer &optimizer,
int episodes = 100, int batch_size = 1);
int epoch = 2000, int episode = 140);
#endif
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment