Commit 778fa18b authored by ehebrard's avatar ehebrard
Browse files

DFS rollouts

parent 705bf4fd
#include <assert.h>
#include <algorithm>
#include <iostream>
#include "DFSRollout.h"
using namespace std;
int DFSRollout::component_ect(const int op) const
{
auto comp{data.component(op)};
auto t{start[op] + data.duration(op)};
auto del{op};
// cout << "[" << start[op] << " + " << data.duration(op) ;
if(data.is_pickup(op)) {
del = data.get_delivery(op);
t += (data.distance(op, del));
t += data.duration(del);
// cout << " + " << data.distance(op, del) << " + " << data.duration(del);
}
if(num_operation[comp] <= 1) {
auto pick{data.is_full_operation(op) ? data.get_pickempty_idx(comp, period[comp]) : data.get_pickfull_idx(comp, period[comp])};
t += (data.distance(del, pick));
t += data.duration(pick);
// cout << " + " << data.distance(del, pick) << " + " << data.duration(pick);
del = data.get_delivery(pick);
t += (data.distance(pick, del));
t += data.duration(del);
// cout << " + " << data.distance(pick, del) << " + " << data.duration(del) << "] ";
}
return t;
}
double DFSRollout::fitness_func(const int action) const
{
int ect{component_ect(action)};
double lambda1{static_cast<double>(data.due_date(action) - ect) / static_cast<double>(data.max_slack)};
double lambda2{static_cast<double>(start[action] - tour_length) / static_cast<double>(data.max_dist)};
double lambda3{1 - (static_cast<double>(data.trolley_length(action)) / static_cast<double>(data.T_max))};
double lambda4{static_cast<double>(data.is_pickup(action))};
// cout << action << ": s=" << start[action] << " dist="
// << (start[action] - tour_length) << "/" << data.max_dist << " ect=" << ect
// << " dd=" << data.due_date(action) << " slack="
// << (data.due_date(action) - ect) << "/" << data.max_slack
// << " tr=" << data.trolley_length(action) << "/" << data.T_max ; // << endl;
return 1 - (THETA_1 * lambda1 + THETA_2 * lambda2 + THETA_3 * lambda3 + THETA_4 * lambda4);
// // distance return 0 if last_task < 0
//
// double lambda1 = (double)(s.lst(action) - max(data.release_date(action), s.date - s.tardiness + dist)) / data.max_slack;
// double lambda2 = (double)(max(data.release_date(action) - (s.date - s.tardiness), dist)) / data.max_dist;
// double lambda3 = 1 - (double)data.trolley_length(action) / data.T_max;
// double lambda4 = data.is_pickup(action) ? 1 : 0;
//
//
//
// // std::cerr << "lst=" << s.lst(action) << ", release=" << data.release_date(action) << ",date=" << s.date << ", tard="<< s.tardiness << std::endl;
// // std::cerr << lambda1 << " / " << lambda2 << " / " << lambda3 << " / " << lambda4 << std::endl;
//
// return 1 - (THETA_1 * lambda1 + THETA_2 * lambda2 + THETA_3 * lambda3 + THETA_4 * lambda4);
// return 0;
}
DFSRollout::DFSRollout(Instance& data) : data(data) {
period.resize(data.nb_components, 0);
prev_operation.resize(data.nb_components, -1);
num_operation.resize(data.nb_components, 0);
start.resize(data.nb_tasks, 0);
distance.resize(data.nb_tasks, 0);
proba.resize(data.nb_tasks, 0);
maximum_tardiness.push_back(0);
random_generator.seed(12345);
}
void DFSRollout::clear() {
sequence.clear();
std::fill(period.begin(), period.end(), 0);
std::fill(num_operation.begin(), num_operation.end(), 0);
std::fill(prev_operation.begin(), prev_operation.end(), -1);
maximum_tardiness.clear();
maximum_tardiness.push_back(0);
tour_length = 0;
train_length = 0;
}
void DFSRollout::verify(const char* msg, const int offset) {
if(maximum_tardiness.size() != sequence.size() + 1 + offset) {
cout << msg << ", tardiness stack length: " << (maximum_tardiness.size()) << " != " << (sequence.size() + 1) << endl;
exit(1);
}
auto tl{data.train_use(sequence[0])};
for(int i{1}; i<sequence.size(); ++i) {
int x{sequence[i-1]};
int y{sequence[i]};
tl += data.train_use(sequence[i]);
if(tl > data.T_max) {
cout << msg << ", train length: " << tl << " > " << data.T_max << endl;
exit(1);
}
if(start[y] < start[x] + data.duration(x) + data.distance(x,y)) {
cout << msg << ", overlap: " << start[y] << " < " << (start[x] + data.duration(x) + data.distance(x,y)) << endl;
exit(1);
}
if(start[y] + data.duration(y) - data.due_date(y) > maximum_tardiness[i+1]) {
cout << msg << ", wrong tardiness for " << y << ": " << (start[y] + data.duration(y) - data.due_date(y))
<< " > " << maximum_tardiness[i+1] << endl;
// cout << " tardiness stack length: " << (maximum_tardiness.size()) << " != " << (sequence.size() + 1) << endl;
exit(1);
}
}
}
bool DFSRollout::trainOk(const int i) const {
return train_length + data.trolley_length(i) <= data.T_max;
}
void DFSRollout::get_operations() {
actions.clear();
int p;
for(int i{0}; i<data.nb_components; ++i) {
if(period[i] == data.get_nb_periods(i))
continue;
if(num_operation[i] == 0) {
p = data.get_pickfull_idx(i, period[i]);
if(trainOk(p))
actions.push_back(p);
p = data.get_pickempty_idx(i, period[i]);
if(trainOk(p))
actions.push_back(p);
} else if(data.is_pickup(prev_operation[i])) {
actions.push_back(data.get_delivery(prev_operation[i]));
} else {
assert(num_operation[i] == 2);
if(data.is_full_operation(prev_operation[i])) {
p = data.get_pickempty_idx(i, period[i]);
if(trainOk(p))
actions.push_back(p);
} else {
p = data.get_pickfull_idx(i, period[i]);
if(trainOk(p))
actions.push_back(p);
}
}
}
}
void DFSRollout::get_distribution(const double temperature)
{
double sum_exp_fitness = 0;
fitnesses.resize(actions.size());
exp_fitness.resize(actions.size());
double max_fit = -std::numeric_limits<float>::infinity();
// Need Max fitness for soft max regularization
for(auto i{0}; i < actions.size(); ++i)
{
//compute fitness for action i
double fitness = fitness_func(actions[i]);
max_fit = std::max(fitness, max_fit);
fitnesses[i] = fitness;
}
for(size_t i = 0; i < actions.size(); ++i)
{
exp_fitness[i] = exp((fitnesses[i] - max_fit) / temperature);
sum_exp_fitness += exp_fitness[i];
}
double tp{0};
for(auto i{0}; i < actions.size(); ++i)
{
proba[actions[i]] = static_cast<long int>((exp_fitness[i] / sum_exp_fitness) * precision);
tp += proba[actions[i]];
}
std::sort(actions.begin(), actions.end(), [&](const int a, const int b) {return proba[a] < proba[b];});
auto i{actions.size()};
while(tp < precision and i) {
++proba[actions[--i]];
++tp;
}
}
int DFSRollout::get_previous_operations(const int op) const {
int c{data.component(op)};
if(num_operation[c] == 1) {
if(period[c]) {
// op was the first operation of the period, return the last delivery of the previous period
int fd{data.get_delivery(data.get_pickfull_idx(c, period[c]-1))};
int ed{data.get_delivery(data.get_pickempty_idx(c, period[c]-1))};
return (start[fd] > start[ed] ? fd : ed);
} else {
return -1;
}
} else if(data.is_pickup(op)) {
// op is a pickup, return the delivery of the oposite pickup
if(data.is_full_operation(op)) {
return data.get_delivery(data.get_pickempty_idx(c, period[c]));
} else {
return data.get_delivery(data.get_pickfull_idx(c, period[c]));
}
}
// op is a delivery, return its pickup
return data.get_pickup(op);
}
int DFSRollout::compute_distance(const int op) {
auto here{sequence.back()};
distance[op] = data.distance(here, op);
start[op] = data.release_date(op);
if(tour_length + distance[op] > start[op]) {
start[op] = tour_length + distance[op];
} else {
distance[op] = start[op] - tour_length;
}
return start[op] + data.duration(op) - data.due_date(op);
// auto tardiness{start[op] + data.duration(op) - data.due_date(op)};
}
void DFSRollout::compute_distances() {
if(sequence.size() == 0) {
maximum_tardiness.push_back(0);
} else {
auto max_tardiness{0};
for(auto op : actions) {
auto tardiness{compute_distance(op)};
max_tardiness = std::max(max_tardiness, tardiness);
}
maximum_tardiness.push_back(std::max(get_tardiness(), max_tardiness));
}
}
void DFSRollout::print_step() const {
if(sequence.size() == 0)
cout << sequence.size() << " empty!\n";
else {
int op{sequence.back()};
cout << sequence.size() << " " << op << ": " << start[op] << "+" << data.duration(op) << " ["
<< data.due_date(op)
<< "] -> " << tour_length << " / " << train_length << " / " << get_tardiness() << endl;
}
}
void DFSRollout::greedy_first() {
while(sequence.size() < data.nb_tasks) {
get_operations();
assert(actions.size() > 0);
compute_distances();
commit(actions[0]);
// print_step();
}
}
void DFSRollout::random_walk() {
while(sequence.size() < data.nb_tasks) {
get_operations();
compute_distances();
commit(actions[random_generator() % actions.size()]);
// print_step();
}
}
void DFSRollout::greedy_best() {
while(sequence.size() < data.nb_tasks) {
get_operations();
compute_distances();
auto best{-1};
auto best_fit{-numeric_limits<double>::max()};
for(auto op : actions) {
auto f{fitness_func(op)};
if(f > best_fit) {
best = op;
best_fit = f;
}
}
commit(best);
// print_step();
}
}
void DFSRollout::greedy_stochastic() {
while(sequence.size() < data.nb_tasks) {
get_operations();
compute_distances();
get_distribution(0.1);
auto act{actions.rbegin()};
auto r{random_generator() % precision};
while(act != actions.rend()) {
if(r < proba[*act])
break;
else {
r -= proba[*act];
++act;
}
}
commit(*act);
// print_step();
}
}
void DFSRollout::commit(const int op) {
#ifdef DEBUG
verify("before commit", 1);
#endif
sequence.push_back(op);
tour_length += distance[op];
tour_length += data.duration(op);
train_length += data.train_use(op);
auto c{data.component(op)};
prev_operation[c] = op;
if(++num_operation[c] == 4) {
num_operation[c] = 0;
++period[c];
}
#ifdef DEBUG
verify("after commit");
#endif
}
void DFSRollout::undo() {
#ifdef DEBUG
verify("before undo");
#endif
int op{sequence.back()};
sequence.pop_back();
maximum_tardiness.pop_back();
auto here{sequence.back()};
auto dist{start[op] - (start[here] + data.duration(here))};
tour_length -= dist;
tour_length -= data.duration(op);
train_length -= data.train_use(op);
auto c{data.component(op)};
prev_operation[c] = get_previous_operations(op);
if(--num_operation[c] == -1) {
num_operation[c] = 3;
--period[c];
}
#ifdef DEBUG
verify("after undo");
#endif
}
void DFSRollout::search(const int ub, const int max_iter, const bool randomized) {
int best{ub};
int deepest{0};
domain.resize(data.nb_tasks);
action.resize(data.nb_tasks);
int iter{0};
while(true) {
++iter;
// branch
while(sequence.size() < data.nb_tasks) {
get_operations();
compute_distances();
// fail because of the upper bound
if(get_tardiness() >= best) {
if(best == 1 and sequence.size() > deepest)
{
deepest = sequence.size();
cout << "d depth=" << deepest << " iter=" << iter << endl;
}
maximum_tardiness.pop_back();
break;
}
// get the probability distribution on the actions, and create the domain from that
get_distribution(temperature);
auto i{sequence.size()};
domain[i].clear();
for(auto a{actions.rbegin()}; a!=actions.rend(); ++a) {
if(proba[*a] > 0)
domain[i].push_back(*a);
}
// if randomized, swap the first action with one chosen at random given the distribution of the heuristic
if(randomized) {
auto act{domain[i].begin()};
auto r{random_generator() % precision};
while(act != domain[i].end()) {
if(r < proba[*act])
break;
else {
r -= proba[*act];
++act;
}
}
std::swap(*act, *domain[i].begin());
}
// branch left
action[i] = domain[i].begin();
commit(*action[i]);
}
// the branch is full
if(sequence.size() == data.nb_tasks) {
best = get_tardiness();
cout << "d lmax=" << best << " iter=" << iter << endl;
verify("solution");
}
// backtrack
while(sequence.size()) {
// undo until the lower bound is lower than the upper bound
int i;
do {
undo();
i = sequence.size();
++action[i];
} while(sequence.size() and get_tardiness() >= best);
if(sequence.empty())
break;
// if there is an action that we haven't tried yet, try it, otherwise backtrack again
if(action[i] != domain[i].end()) {
maximum_tardiness.push_back(std::max(get_tardiness(), compute_distance(*action[i])));
commit(*action[i]);
break;
}
}
// check if the algorithm should stop
if(sequence.empty() and action[0] == domain[0].end()) {
cout << "o search tree exhausted: " << best << " at temperature " << temperature << " \n" ;
} else if(best == 0) {
cout << "o solution found\n";
} else if(iter >= max_iter) {
cout << "o time out\n";
} else
continue;
break;
}
}
#ifndef __DFSROLLOUT_H
#define __DFSROLLOUT_H
#include <random>
#include "instance.h"
using namespace std;
#define THETA_1 0.251156433962621
#define THETA_2 0.576637984553322
#define THETA_3 0.148330024098084
#define THETA_4 0.023875557385973
class DFSRollout {
public:
DFSRollout(Instance& data);
void random_walk();
void greedy_first();
void greedy_best();
void greedy_stochastic();
void search(const int ub, const int max_iter=numeric_limits<int>::max(), const bool rand=true);
void commit(const int op);
void undo();
void clear();
void print_step() const;
int get_tardiness() const {
return maximum_tardiness.back();
}
int get_marginal_tardiness(const int i) const {
return maximum_tardiness[i+1] - maximum_tardiness[i];
}
int back() const {
return sequence.back();
}
// the total tour_length of the
int tour_length{0};
// the total tour_length of the
int train_length{0};
double temperature{0.0005};
private:
Instance& data;
//
vector<int> maximum_tardiness;
// the actual sequence of operations
vector<int> sequence;
// the set of possible operations
vector<int> actions;
// the current period, for every component
vector<int> period;
// the latest operation, for every component
vector<int> prev_operation;
// the number of operations in the period, for every component
vector<int> num_operation;
// for every operation, its start time if scheduled, or a lower bound on it if it is a possible actions choice
vector<int> start;
// used to store the distance of the actions
vector<int> distance;
// used to store the probabilities of the actions
vector<long int> proba;
vector<double> fitnesses;
vector<double> exp_fitness;
long int precision{1000000};
//domains for search
vector<vector<int>> domain;
vector<vector<int>::iterator> action;
std::mt19937 random_generator;
int component_ect(const int op) const;
void get_operations();