#define LOG(kind,...) GNUNET_log_from (kind, "ats-ril",__VA_ARGS__)
-#define RIL_MIN_BW ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__)
-#define RIL_MAX_BW 1024 * 250 //GNUNET_ATS_MaxBandwidth
+#define RIL_MIN_BW (1 * ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__))
+#define RIL_MAX_BW GNUNET_ATS_MaxBandwidth
#define RIL_ACTION_INVALID -1
#define RIL_INTERVAL_EXPONENT 10
-#define RIL_UTILITY_DELAY_MAX 100
+#define RIL_UTILITY_DELAY_MAX 1000
-#define RIL_DEFAULT_STEP_TIME_MIN GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 500)
-#define RIL_DEFAULT_STEP_TIME_MAX GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 3000)
+#define RIL_DEFAULT_STEP_TIME_MIN GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 200)
+#define RIL_DEFAULT_STEP_TIME_MAX GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 1000)
#define RIL_DEFAULT_ALGORITHM RIL_ALGO_Q
#define RIL_DEFAULT_SELECT RIL_SELECT_EGREEDY
#define RIL_DEFAULT_WELFARE RIL_WELFARE_EGALITARIAN
#define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.1
#define RIL_DEFAULT_TRACE_DECAY 0.5
#define RIL_DEFAULT_EXPLORE_RATIO 0.1
-#define RIL_DEFAULT_RBF_DIVISOR 10
+#define RIL_DEFAULT_RBF_DIVISOR 50
#define RIL_DEFAULT_GLOBAL_REWARD_SHARE 0.5
#define RIL_DEFAULT_TEMPERATURE 1.0
#define RIL_INC_DEC_STEP_SIZE 1
-#define RIL_NOP_BONUS 0.5
+#define RIL_NOP_DECAY 0.5
/**
* ATS reinforcement learning solver
enum RIL_Action_Type
{
RIL_ACTION_NOTHING = 0,
- RIL_ACTION_BW_IN_DBL = -2, //TODO! put actions back
+ RIL_ACTION_BW_IN_DBL = -2, //TODO? Potentially add more actions
RIL_ACTION_BW_IN_HLV = -3,
RIL_ACTION_BW_IN_INC = 1,
RIL_ACTION_BW_IN_DEC = 2,
* @return wrapped address
*/
static struct RIL_Address_Wrapped *
-agent_address_get (struct RIL_Peer_Agent *agent, struct ATS_Address *address)
+agent_address_get_wrapped (struct RIL_Peer_Agent *agent, struct ATS_Address *address)
{
struct RIL_Address_Wrapped *cur;
agent_update (struct RIL_Peer_Agent *agent, double reward, double *s_next, int a_prime)
{
int i;
+ int k;
double delta;
- double *theta = agent->W[agent->a_old];
+ double **theta = agent->W;
delta = agent->envi->global_discount_integrated * reward; //reward
delta += agent->envi->global_discount_variable * agent_q (agent, s_next, a_prime); //discounted future value
// agent_q (agent, s_next, a_prime),
// delta);
- for (i = 0; i < agent->m; i++)
+ for (k = 0; k < agent->n; k++)
{
-// LOG(GNUNET_ERROR_TYPE_INFO, "alpha = %f delta = %f e[%d] = %f\n",
-// agent->envi->parameters.alpha,
-// delta,
-// i,
-// agent->e[i]);
- theta[i] += agent->envi->parameters.alpha * delta * agent->E[agent->a_old][i];
+ for (i = 0; i < agent->m; i++)
+ {
+ // LOG(GNUNET_ERROR_TYPE_INFO, "alpha = %f delta = %f e[%d] = %f\n",
+ // agent->envi->parameters.alpha,
+ // delta,
+ // i,
+ // agent->e[i]);
+ theta[k][i] += agent->envi->parameters.alpha * delta * agent->E[k][i];
+ }
}
}
agent->E[action][i] += feature[i];
break;
case RIL_E_REPLACE:
- agent->E[action][i] = (agent->envi->global_discount_variable * agent->envi->parameters.lambda * agent->E[action][i]) > feature[i] ? agent->E[action][i] : feature[i];
+ agent->E[action][i] = agent->E[action][i] > feature[i] ? agent->E[action][i] : feature[i];
break;
case RIL_E_DISCOUNT:
for (k = 0; k < agent->n; k++)
delay_atsi = (double) ril_get_atsi (agent->address_inuse, GNUNET_ATS_QUALITY_NET_DELAY);
delay_norm = RIL_UTILITY_DELAY_MAX*exp(-delay_atsi*0.00001);
- pref_match = (preferences[GNUNET_ATS_PREFERENCE_LATENCY] * delay_norm);
- pref_match += (preferences[GNUNET_ATS_PREFERENCE_BANDWIDTH] * (double) (agent->bw_in/RIL_MIN_BW));
- pref_match += (preferences[GNUNET_ATS_PREFERENCE_BANDWIDTH] * (double) (agent->bw_out/RIL_MIN_BW));
+ pref_match = preferences[GNUNET_ATS_PREFERENCE_LATENCY] * delay_norm;
+ pref_match += preferences[GNUNET_ATS_PREFERENCE_BANDWIDTH] *
+ sqrt((double) (agent->bw_in/RIL_MIN_BW) * (double) (agent->bw_out/RIL_MIN_BW));
// return (double) (agent->bw_in/RIL_MIN_BW);
// return sqrt((double) (agent->bw_in/RIL_MIN_BW) * (double) (agent->bw_out/RIL_MIN_BW));
{
if (cur->is_active && cur->address_inuse && (cur->address_inuse->solver_information == scope))
{
- result *= agent_get_utility(cur);
+ result *= pow(agent_get_utility(cur), 1.0 / (double) scope->agent_count);
}
}
- return pow(result, 1.0 / (double) scope->agent_count);
+ return result;
}
GNUNET_assert(GNUNET_NO);
return 1;
}
+static double
+envi_get_penalty (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
+{
+ struct RIL_Scope *net;
+ unsigned long long over_max;
+ unsigned long long over_in = 0;
+ unsigned long long over_out = 0;
+
+ net = agent->address_inuse->solver_information;
+
+ if (net->bw_in_utilized > net->bw_in_available)
+ {
+ over_in = net->bw_in_utilized - net->bw_in_available;
+ if (RIL_ACTION_BW_IN_INC == agent->a_old)
+ {
+ over_in *= 2;
+ }
+ }
+ if (net->bw_out_utilized > net->bw_out_available)
+ {
+ over_out = net->bw_out_utilized - net->bw_out_available;
+ if (RIL_ACTION_BW_OUT_INC == agent->a_old)
+ {
+ over_out *= 2;
+ }
+ }
+ over_max = GNUNET_MAX (over_in , over_out) / RIL_MIN_BW;
+
+ return -1.0 * (double) over_max;
+}
+
/**
* Gets the reward for the last performed step, which is calculated in equal
* parts from the local (the peer specific) and the global (for all peers
envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
{
struct RIL_Scope *net;
- unsigned long long overutilization;
- unsigned long long over_in = 0;
- unsigned long long over_out = 0;
double objective;
double delta;
double steady;
+ double pen_share;
+ double penalty;
+ double reward;
net = agent->address_inuse->solver_information;
- //TODO make sure in tests to have utilization property updated
- if (net->bw_in_assigned > net->bw_in_available)
- over_in = net->bw_in_assigned - net->bw_in_available;
- if (net->bw_out_assigned > net->bw_out_available)
- over_out = net->bw_out_assigned - net->bw_out_available;
- overutilization = GNUNET_MAX(over_in, over_out) / RIL_MIN_BW;
-
objective = (agent_get_utility (agent) + net->social_welfare) / 2;
delta = objective - agent->objective_old;
agent->objective_old = objective;
if (delta != 0)
{
- agent->nop_bonus = delta * 0.5;
- }
-
- LOG(GNUNET_ERROR_TYPE_DEBUG, "agent->nop_bonus: %f\n", agent->nop_bonus);
-
- steady = (RIL_ACTION_NOTHING == agent->a_old) ? agent->nop_bonus : 0;
-
- if (0 != overutilization)
- {
- return -1.0 * (double) overutilization;
+ agent->nop_bonus = delta * RIL_NOP_DECAY;
}
else
{
- return delta + steady;
+ agent->nop_bonus *= RIL_NOP_DECAY;
}
+
+ steady = (RIL_ACTION_NOTHING == agent->a_old) ? agent->nop_bonus : 0;
+
+ pen_share = 0.5;
+ penalty = envi_get_penalty(solver, agent);
+
+ reward = delta + steady;
+ return ((1 - pen_share) * reward) + (pen_share * penalty);
}
/**
struct RIL_Address_Wrapped *cur;
int i = 0;
+ cur = agent_address_get_wrapped(agent, agent->address_inuse);
+
for (cur = agent->addresses_head; NULL != cur; cur = cur->next)
{
if (i == address_index)
net = solver->network_entries[i];
if (net.bw_in_assigned > 0) //only consider scopes where an address is actually active
{
- sum_assigned += net.bw_in_assigned;
- sum_assigned += net.bw_out_assigned;
+ sum_assigned += net.bw_in_utilized;
+ sum_assigned += net.bw_out_utilized;
sum_available += net.bw_in_available;
sum_available += net.bw_out_available;
}
ratio = 0;
}
- return ratio > 1 ? 1 : ratio; //overassignment is possible, cap at 1
+ return ratio > 1 ? 1 : ratio; //overutilization is possible, cap at 1
}
/**
if (ril_network_is_not_full(solver, net->type))
{
if (NULL == agent->address_inuse)
- envi_set_active_suggestion (solver, agent, addr_wrap->address_naked, RIL_MIN_BW, RIL_MIN_BW, silent);
+ envi_set_active_suggestion (solver, agent, addr_wrap->address_naked, agent->bw_in, agent->bw_out, silent);
return;
}
}
struct GNUNET_TIME_Relative time_delta;
double tau;
- // MDP case - TODO remove when debugged
+ // MDP case - TODO! remove when debugged and test SMDP case
if (solver->simulate)
{
solver->global_discount_variable = solver->parameters.gamma;
offset = (double) solver->parameters.step_time_min.rel_value_us;
y = factor * pow (used_ratio, RIL_INTERVAL_EXPONENT) + offset;
- GNUNET_assert(y <= (double ) solver->parameters.step_time_max.rel_value_us);
- GNUNET_assert(y >= (double ) solver->parameters.step_time_min.rel_value_us);
+ GNUNET_assert(y <= (double) solver->parameters.step_time_max.rel_value_us);
+ GNUNET_assert(y >= (double) solver->parameters.step_time_min.rel_value_us);
time_next = GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MICROSECONDS, (unsigned long long) y);
+// LOG (GNUNET_ERROR_TYPE_INFO, "ratio: %f, factor: %f, offset: %f, y: %f\n",
+// used_ratio,
+// factor,
+// offset,
+// y);
+
if (solver->simulate)
{
time_next = GNUNET_TIME_UNIT_ZERO;
agent->s_old = GNUNET_malloc (sizeof (double) * agent->m);
agent->address_inuse = NULL;
agent->objective_old = 0;
+ agent->nop_bonus = 0;
return agent;
}
}
address_index = agent_address_get_index (agent, address);
- address_wrapped = agent_address_get (agent, address);
+ address_wrapped = agent_address_get_wrapped (agent, address);
if (NULL == address_wrapped)
{
net = address->solver_information;
- GNUNET_assert(!ril_network_is_active (s, net->type));
LOG(GNUNET_ERROR_TYPE_DEBUG,
"Address not considered by agent, address was in inactive network\n");
return;
{
if (NULL != agent->addresses_head) //if peer has an address left, use it
{
- envi_set_active_suggestion (s, agent, agent->addresses_head->address_naked, RIL_MIN_BW, RIL_MIN_BW,
- GNUNET_NO);
+ envi_set_active_suggestion (s, agent, agent->addresses_head->address_naked, agent->bw_in, agent->bw_out,
+ GNUNET_YES);
}
else
{
"API_get_preferred_address() Activated agent for peer '%s', but no address available\n",
GNUNET_i2s (peer));
}
-
return agent->address_inuse;
}