Commit bcf4becd authored by Valentin Antuori's avatar Valentin Antuori
Browse files

update

parent 2b92d7f6
......@@ -7,7 +7,8 @@
#include <vector>
// Instance::Instance(){}
// TODO : gérer les exceptions des instances industrielles : pas de matrice dans le fichier, Composants absent (troue dans les id compo)
// TODO : gérer les exceptions des instances industrielles : pas de matrice dans le fichier,
// TODO : Composants absent (troue dans les id compo)
Instance::Instance(const std::string& filename)
{
std::ifstream infile(filename);
......@@ -158,11 +159,19 @@ bool Instance::is_delivery(const int task)const
{
return type(task) == TaskType::DF || type(task) == TaskType::DE;
}
int Instance::get_pickup (const int delivery) const
bool Instance::is_full_operation(const int task) const
{
return type(task) == TaskType::PF || type(task) == TaskType::DF;
}
bool Instance::is_empty_operation(const int task) const
{
return type(task) == TaskType::PE || type(task) == TaskType::DE;
}
int Instance::get_pickup(const int delivery) const
{
return delivery-1;
}
int Instance::get_delivery (const int pickup) const
int Instance::get_delivery(const int pickup) const
{
return pickup+1;
}
......@@ -178,7 +187,15 @@ int Instance::due_date(const int task)const
{
return m_due_dates[task];
}
int Instance::get_picfull_idx(const int component, const int period)const
int Instance::get_pickfull_idx(const int component, const int period)const
{
return m_idxs[component][period];
}
int Instance::get_pickempty_idx(const int component, const int period)const
{
return m_idxs[component][period] + 2;
}
int Instance::get_nb_periods(const int component) const
{
return m_idxs[component].size();
}
\ No newline at end of file
......@@ -47,14 +47,22 @@ class Instance
bool is_pickup (const int task) const;
/** True if the task if a delivery**/
bool is_delivery (const int task) const;
/** True if the task if a full trolley operation**/
bool is_full_operation (const int task) const;
/** True if the task if an empty trolley operation**/
bool is_empty_operation (const int task) const;
/** return the delivery associated **/
int get_delivery (const int pickup) const;
/** return the pickup associated **/
int get_pickup (const int delivery) const;
/** return the idx of full pickup for a component and a period **/
int get_picfull_idx(const int component, const int period) const;
/** return the idx for a full pickup for a component and a period **/
int get_pickfull_idx(const int component, const int period) const;
/** return the idx for an empty pickup for a component and a period **/
int get_pickempty_idx(const int component, const int period) const;
/** Get the number of period for this component **/
int get_nb_periods(const int component) const;
/** **/
std::string task_to_string(const int task) const;
// Keep all atributes public
......
......@@ -6,6 +6,8 @@
#include <sys/stat.h>
#include <experimental/filesystem>
#include <typeinfo>
#include <torch/torch.h>
#include "instance.h"
......@@ -96,18 +98,33 @@ int main(int argc, char **argv){
}
*/
torch::manual_seed(42);
torch::manual_seed(opt.seed);
// const auto device = torch::cuda::is_available() ? torch::kCUDA : torch::kCPU;
const auto device = torch::kCPU;
auto policy = construct_policy(device);
// auto learning_rate = pow(2, -12) / inst.nb_tasks;
// auto optimizer = torch::optim::SGD(policy->parameters(), /*lr=*/learning_rate);
auto learning_rate = 0.1;
auto optimizer = torch::optim::Adam(policy->parameters(), /*lr=*/learning_rate);
set_temperature(opt.temperature);
improve_policy(policy, data, device, optimizer, opt.nb_episode, opt.batch_size);
auto policy = construct_policy(device);
int nb_tasks_sum = 0;
for(auto inst : data){
nb_tasks_sum += inst.nb_tasks;
}
if(opt.adam){
auto learning_rate = 0.01;
auto optimizer = torch::optim::Adam(policy->parameters(), /*lr=*/learning_rate);
std::cout << "optimizer : " << typeid(optimizer).name() << std::endl;
set_temperature(opt.temperature);
improve_policy(policy, data, device, optimizer, opt.nb_episode, opt.batch_size);
}else{
auto learning_rate = pow(2, -12) / (nb_tasks_sum / data.size());
auto optimizer = torch::optim::SGD(policy->parameters(), /*lr=*/learning_rate);
std::cout << "optimizer : " << typeid(optimizer).name() << std::endl;
set_temperature(opt.temperature);
improve_policy(policy, data, device, optimizer, opt.nb_episode, opt.batch_size);
}
return 0;
......
......@@ -152,6 +152,10 @@ ChariotOptions parse(int argc, char *argv[]) {
cmd.add<ValueArg<double>>(opt.temperature, "", "temperature", "temperature parameter", /*required=*/false, /*default=*/1,
"double");
cmd.add<SwitchArg>(opt.adam, "", "adam", "use adam optimizer",
false);
cmd.parse(argc, argv);
return opt;
}
......
......@@ -31,6 +31,7 @@ public:
int batch_size;
int nb_episode;
bool adam;
double time;
ChariotOptions(){};
......
......@@ -13,13 +13,16 @@
#include "solution.h"
#include "policy-gradient.h"
#define NEW_PERIOD -1
#define ENDED -2
namespace nn = torch::nn;
namespace optim = torch::optim;
using Tensor = torch::Tensor;
double temperature = 1.0;
bool debug_flag = false;
bool trace_flag = true;
bool trace_flag = false;
void set_temperature(double t)
{
......@@ -30,73 +33,92 @@ class State
{
public:
const Instance data;
int last_task;
int train_size;
int time;
int tardiness;
std::vector<int> nexts;
std::vector<int> curr_period;
std::vector<int> lst;
std::vector<bool> go_back;
bool is_valid;
State(const Instance& inst): data(inst), last_task(-1), train_size(0), time(0), tardiness(0), nexts(data.nb_tasks, 0),
lst(data.nb_tasks, 0), is_valid(true)
State(const Instance& inst): data(inst), last_task(-1), train_size(0), time(0), tardiness(0),
nexts(data.nb_components, NEW_PERIOD), curr_period(data.nb_components, 0), lst(data.nb_tasks, 0),
go_back(data.nb_components, false), is_valid(true)
{
// compute lst for each components pickup
for(int comp = 0; comp < data.nb_components; ++comp)
{
int pick_idx = data.get_picfull_idx(comp, 0);
int pick_idx = data.get_pickfull_idx(comp, 0);
int durations = data.duration(pick_idx) + data.duration(pick_idx+1) + data.duration(pick_idx+2) + data.duration(pick_idx+3);
int travels = data.distance(pick_idx, pick_idx+1) + data.distance(pick_idx+1, pick_idx+2) + data.distance(pick_idx+2, pick_idx+3);
lst[comp] = data.due_date(pick_idx+3) - travels - durations;
nexts[comp] = pick_idx;
}
}
/** return the reward **/
int update(int comp)
int update(int task)
{
int comp = data.component(task);
int reward = 0;
int task = nexts[comp];
int dist = 0;
if(last_task > 0){
dist = data.distance(last_task, task);
}
train_size += data.train_use(task);
time = std::max(time + dist + data.duration(task), data.release_date(task));
// This task cannot be added to the current state
if(task < 0 || train_size + data.train_use(task) > data.T_max){
// std::cout << "DEPASSEMENT" << std::endl;
// reward = -50000;
is_valid = false;
// compute the "look ahead" tardiness
int tmp_tard = std::max(0, time - (lst[comp] + data.duration(task)));
if(tmp_tard > tardiness){
// the reward is the marginal increasing tardiness
reward = -(tmp_tard - tardiness);
tardiness = tmp_tard;
}
else
{
train_size += data.train_use(task);
time = std::max(time + data.distance(last_task, task) + data.duration(task), data.release_date(task));
// compute the "look ahead" tardiness
int tmp_tard = std::max(0, time - (lst[comp] + data.duration(task)));
if(tmp_tard > tardiness){
// the reward is the marginal increasing tardiness
reward = -(tmp_tard - tardiness);
tardiness = tmp_tard;
if(nexts[comp] == NEW_PERIOD || data.is_pickup(task)){
nexts[comp] = task+1;
}else{
// Delivery chosen
if(go_back[comp]){
// go to the next periods if exist
curr_period[comp]++;
if(curr_period[comp] >= data.get_nb_periods(comp)){
// No new period for this component
nexts[comp] = ENDED;
}else{
nexts[comp] = NEW_PERIOD;
}
go_back[comp] = false;
}else{
go_back[comp] = true;
// next task is a the other pickup
if(data.is_empty_operation(task)){
nexts[comp] = data.get_pickfull_idx(comp, curr_period[comp]);
}else{
nexts[comp] = data.get_pickempty_idx(comp, curr_period[comp]);
}
}
}
nexts[comp]++;
if(nexts[comp] >= data.nb_tasks || data.component(nexts[comp]) != comp) // this was the last task for this component
{
nexts[comp] = -1;
lst[comp] = -1;
}else
{
if(data.period(nexts[comp]) != data.period(task)) // this was the last task for this period
{
// recompute lst for this period
int pick_idx = data.get_picfull_idx(comp, data.period(nexts[comp]));
int durations = data.duration(pick_idx) + data.duration(pick_idx+1) + data.duration(pick_idx+2) + data.duration(pick_idx+3);
int travels = data.distance(pick_idx, pick_idx+1) + data.distance(pick_idx+1, pick_idx+2) + data.distance(pick_idx+2, pick_idx+3);
lst[comp] = data.due_date(pick_idx+3) - travels - durations;
}else
{
//update lst
lst[comp] -= data.duration(task) + data.distance(task, task+1);
}
// then compute the lst
if(nexts[comp] == NEW_PERIOD){
int pick_idx = data.get_pickfull_idx(comp, curr_period[comp]);
int durations = data.duration(pick_idx) + data.duration(pick_idx+1) + data.duration(pick_idx+2) + data.duration(pick_idx+3);
int travels = data.distance(pick_idx, pick_idx+1) + data.distance(pick_idx+1, pick_idx+2) + data.distance(pick_idx+2, pick_idx+3);
lst[comp] = data.due_date(pick_idx+3) - travels - durations;
}else if(nexts[comp] != ENDED){
lst[comp] += data.duration(task);
lst[comp] += data.distance(task, nexts[comp]);
}
last_task = task;
......@@ -105,22 +127,37 @@ class State
Tensor to_tensor()
{
mapping.clear();
mapping_act2task.clear();
for(int comp = 0; comp < data.nb_components; ++comp){
if(nexts[comp] >= 0 && train_size + data.train_use(nexts[comp]) <= data.T_max){
mapping.push_back(comp);
if(nexts[comp] != ENDED){
if(nexts[comp] == NEW_PERIOD){
int period = curr_period[comp];
if(train_size + data.train_use(data.get_pickfull_idx(comp, period)) <= data.T_max){
mapping_act2task.push_back(data.get_pickfull_idx(comp, period));
mapping_act2task.push_back(data.get_pickempty_idx(comp, period));
}
}else if(train_size + data.train_use(nexts[comp]) <= data.T_max){
mapping_act2task.push_back(nexts[comp]);
}
}
}
// for(auto elt : mapping_act2task){
// std::cout << elt << ", ";
// }
// std::cout << std::endl;
// int counter = 0;
int size = mapping.size();
int size = mapping_act2task.size();
torch::Tensor t = torch::empty({size, 4}, torch::kFloat);
// std::cout << "candidat : " << std::endl;
for(uint i = 0; i < mapping.size(); ++i){
int comp = mapping[i];
int task = nexts[comp];
if(trace_flag){
std::cout << "candidate : " << std::endl;
}
for(uint i = 0; i < mapping_act2task.size(); ++i){
int task = mapping_act2task[i];
int comp = data.component(task);
int dist = 0;
if(last_task > 0)
......@@ -130,48 +167,37 @@ class State
int pick_1 = data.is_pickup(task) ? 1 : 0;
t[i][0] = 1-(float)slack / data.max_slack;
t[i][1] = 1-(float)std::max(dist, data.release_date(task) - (time - tardiness)) / data.max_dist;
t[i][2] = (float)data.trolley_length(task) / data.T_max;
t[i][3] = 1-(float)pick_1;
// std::cout << task << " ";
// t[counter++] = (float)available;
t[i][0] = (float)slack / data.max_slack;
t[i][1] = (float)std::max(dist, data.release_date(task) - (time - tardiness)) / data.max_dist;
t[i][2] = 1 - (float)data.trolley_length(task) / data.T_max;
t[i][3] = (float)pick_1;
if(trace_flag){
std::cout << mapping_act2task[i] << "(" << (float)slack / data.max_slack << " " << (float)std::max(dist, data.release_date(task) - (time - tardiness)) / data.max_dist
<< " " << 1-(float)data.trolley_length(task) / data.T_max << " " << pick_1 << ")" << std::endl;
}
}
// std::cout << std::endl;
return t;
}
Tensor remove_invalid_moves(const Tensor &probs) const
{
torch::Tensor t = probs.clone();
float sum = 0;
for(int i = 0; i < data.nb_components; ++i)
{
if(nexts[i] >= 0 && train_size + data.train_use(nexts[i]) <= data.T_max)
sum += probs[i].item<float>();
}
for(int i = 0; i < data.nb_components; ++i){
if(nexts[i] >= 0 && train_size + data.train_use(nexts[i]) <= data.T_max)
t[i] = probs[i].item<float>() / sum;
else
t[i] = 0;
}
return t;
}
int action_to_comp(const int action) const
int action_to_task(const int action) const
{
return mapping[action];
return mapping_act2task[action];
}
private:
vector<int> mapping;
vector<int> mapping_act2task;
};
struct Net : torch::nn::Module {
Net(int64_t N, int64_t M)
: linear(register_module("linear", torch::nn::Linear(N, M))) {
}
torch::Tensor forward(torch::Tensor input) {
return linear(input);
}
torch::nn::Linear linear;
};
......@@ -180,6 +206,7 @@ class State
nn::Sequential construct_policy(const torch::Device &device)
{
vector<float> theta({1.0034868901129597, 2.3039372263365396, 0.5926475075479015, 0.095393968026037});
// const auto state_space = data.nb_components*4;
// const auto hidden_units = data.nb_components;
// const auto action_space = data.nb_components;
......@@ -202,8 +229,11 @@ nn::Sequential construct_policy(const torch::Device &device)
int choice(const Tensor &probs)
{
auto roll = torch::rand(1).item<float>();
auto save_roll = roll;
// std::cout << roll << std::endl;
// std::cout << probs << std::endl;
for (int i = 0; i < probs.size(0); ++i) {
const auto prob = probs[i].item<float>();
if (roll <= prob)
......@@ -211,6 +241,8 @@ int choice(const Tensor &probs)
else
roll -= prob;
}
std::cerr << save_roll << " --> " << roll << std::endl;
throw("invalid probabilty tensor"s);
}
......@@ -227,19 +259,25 @@ std::tuple<int, Tensor> select_action(nn::Sequential &policy, const Tensor &stat
// std::cout << "Params :"<<std::endl << policy->parameters() << std::endl;
// std::cout << "NN out :"<<std::endl << tmp << std::endl;
const auto probs = torch::softmax(tmp/temperature, /*dim=*/0);
// std::cout << "Proba 1 : " << std::endl << probs << std::endl;
const auto probs = torch::softmax((1-tmp)/temperature, /*dim=*/0);
// const auto valid_probs = s.remove_invalid_moves(probs);
// std::cout << "Proba 2 : " << std::endl << valid_probs << std::endl;
if(display_probs)
std::cout<< probs << std::endl;
if(debug_flag){
std::cout << tmp << std::endl;
std::cout << probs << std::endl;
if(trace_flag){
std::cout << "Proba : ";
for (int i = 0; i < probs.size(0); ++i){
std::cout << probs[i].item<float>() << " " ;
}
std::cout << std::endl;
std::cout << "Score : ";
for (int i = 0; i < probs.size(0); ++i){
std::cout << tmp[i].item<float>() << " " ;
}
std::cout << std::endl;
}
std::tuple<int, Tensor> return_value;
try {
const auto action = choice(probs);
......@@ -270,6 +308,11 @@ std::tuple<Tensor, Tensor, Solution > play_episode(nn::Sequential &policy, const
for(int j = 0; j < data.nb_tasks; ++j){
if(trace_flag){
std::cout << std::endl << " ------ Step "<< j << " ------" << std::endl;
}
auto [action, log_prob] = select_action(policy, s.to_tensor().to(device), s);
// if(s.nexts[action] < 0)
......@@ -280,18 +323,24 @@ std::tuple<Tensor, Tensor, Solution > play_episode(nn::Sequential &policy, const
// log_probs.push_back(std::move(log_prob));
// break;
// }
sol.append(s.nexts[s.action_to_comp(action)]);
int reward = s.update(s.action_to_comp(action));
int selected_task = s.action_to_task(action);
sol.append(selected_task);
// std::cout << sol << std::endl;
int reward = s.update(selected_task);
if(trace_flag){
std::cout << "Choose : " << selected_task << std::endl;
// std::cout << sol << std::endl;
}
// min_reward = std::min(min_reward, reward);
rewards.push_back(reward);
log_probs.push_back(std::move(log_prob));
}
if(debug_flag){
std::cout << sol << std::endl;
// std::cout << "State : " << std::endl;
// std::cout << sol << std::endl;
}
return std::make_tuple(torch::tensor(std::move(rewards)),
......@@ -320,19 +369,28 @@ auto normalize(const Tensor &rewards) -> Tensor
void improve_policy(nn::Sequential &policy, const vector<Instance>& data, const torch::Device &device, optim::Optimizer &optimizer,
int episodes/*=100*/, int batch_size/*=1*/)
{
for (int i = 0; i < episodes; ++i) {
// torch::autograd::GradMode::set_enabled(false);
int real_batch_size = batch_size / data.size();
std::cout << "batch size = " << real_batch_size << std::endl;
for (int i = 0; i < episodes; ++i) {
// if(i == 28){
// trace_flag = true;
// }
double lmax_sum = 0;
torch::Tensor loss_batch = torch::zeros({1}, torch::kFloat);
for(int j = 0; j < batch_size; ++j){
for(int j = 0; j < real_batch_size; ++j){
if(trace_flag){
std::cout << " ========== " << std::endl;
std::cout << " ========== " << std::endl;
}
for(auto inst : data){
const auto [_rewards, _log_probs, _sol] = play_episode(policy, inst, device);
lmax_sum += _sol.lmax();
// std::cout << _sol.lmax() << std::endl;
// log_probs[j] = _log_probs;
// rewards.add(discount(_rewards, /*gamma=*/1));
......@@ -341,7 +399,14 @@ void improve_policy(nn::Sequential &policy, const vector<Instance>& data, const
)
.to(device);
auto loss_t = (-_log_probs.squeeze() * returns).sum();
if(trace_flag){
std::cout << "lmax = " << _sol.lmax() << std::endl;
std::cout << "Log prob : " << _log_probs << std::endl;
std::cout << "Returns " << returns << std::endl;
std::cout << "loss_t = " << loss_t << std::endl;
}
loss_batch.add_(loss_t);
}
......@@ -350,16 +415,28 @@ void improve_policy(nn::Sequential &policy, const vector<Instance>& data, const
optimizer.zero_grad();
auto loss = loss_batch / batch_size;
loss.backward();
auto loss = loss_batch / (real_batch_size * static_cast<int>(data.size()));
std::cout << "Step " << i << " : " << lmax_sum/(real_batch_size * static_cast<int>(data.size())) << std::endl;
std::cout << "Param : " << policy->parameters() << std::endl << std::endl;
if(debug_flag)
{
std::cout << " # # #" << std::endl;
std::cout << "Loss batch = " << loss_batch << std::endl;
std::cout << "LOSS = " << loss << std::endl;
}
loss.backward();
optimizer.step();
if(trace_flag)
if(debug_flag)
{
std::cout << "Step " << i << " : " << lmax_sum/batch_size << std::endl;
std::cout << "Param : " << policy->parameters() << std::endl;
std::cout << " ===> Param : " << policy->parameters() << std::endl;
std::cout << " --- " << std::endl;
std::cout << " # # #" << std::endl;
}
// std::cout << "Returns :" << returns1 << std::endl;
......@@ -374,8 +451,6 @@ void improve_policy(nn::Sequential &policy, const vector<Instance>& data, const
// std::cout << "Returns :" << returns1 << std::endl;
// std::cout << log_probs.squeeze() << std::endl;
// std::cout << "Returns :" << returns1 << std::endl;
// std::cout << -log_probs.squeeze() * returns1 << std::endl;
}