#define RIL_DEFAULT_EXPLORE_RATIO 0.1
#define RIL_DEFAULT_GLOBAL_REWARD_SHARE 0.5
+#define RIL_INC_DEC_STEP_SIZE 1
+
/**
* ATS reinforcement learning solver
*
RIL_ACTION_BW_OUT_HLV = -5,
RIL_ACTION_BW_OUT_INC = -6,
RIL_ACTION_BW_OUT_DEC = -7,
- RIL_ACTION_TYPE_NUM = 2
+ RIL_ACTION_TYPE_NUM = 1
};
enum RIL_Algorithm
* Shutdown
*/
int done;
+
+ /**
+ * Simulate steps, i.e. schedule steps immediately
+ */
+ unsigned long long simulate;
};
/*
if (isinf(result))
{
- GNUNET_assert(GNUNET_NO);
- return isinf(result) * (DBL_MAX / 2); //TODO! fix
+ return isinf(result) * UINT32_MAX; //TODO! fix
}
return result;
delta += agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime); //discounted future value
delta -= agent_estimate_q (agent, agent->s_old, agent->a_old); //one step
-// LOG(GNUNET_ERROR_TYPE_INFO, "Y*r = %f y*Q(s+1,a+1) = %f Q(s,a) = %f\n, y = %f\n",
-// agent->envi->global_discount_integrated * reward,
-// agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime),
-// agent_estimate_q (agent, agent->s_old, agent->a_old),
-// agent->envi->global_discount_variable);
-//
-// LOG(GNUNET_ERROR_TYPE_INFO, "delta = %f\n", delta);
+ LOG(GNUNET_ERROR_TYPE_INFO, "update() Step# %llu Q(s,a): %f a: %f r: %f y: %f Q(s+1,a+1) = %f delta: %f\n",
+ agent->step_count,
+ agent_estimate_q (agent, agent->s_old, agent->a_old),
+ agent->envi->parameters.alpha,
+ reward,
+ agent->envi->global_discount_variable,
+ agent_estimate_q (agent, s_next, a_prime),
+ delta);
for (i = 0; i < agent->m; i++)
{
return sum;
}
-static void
-envi_state_networks (struct GAS_RIL_Handle *solver)
-{
- int i;
- struct RIL_Network net;
- int overutilized_in;
- int overutilized_out;
-
- for (i = 0; i < solver->networks_count; i++)
- {
- net = solver->network_entries[i];
-
- overutilized_in = net.bw_in_assigned > net.bw_in_available;
- overutilized_out = net.bw_out_assigned > net.bw_out_available;
-
- solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 0] = ((double) net.bw_in_assigned / (double) net.bw_in_available)*10;
- solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 1] = (double) overutilized_in;
- solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 2] = ((double) net.bw_out_assigned / (double) net.bw_out_available)*10;
- solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 3] = (double) overutilized_out;
- }
-}
+//static void
+//envi_state_networks (struct GAS_RIL_Handle *solver)
+//{
+// int i;
+// struct RIL_Network net;
+// int overutilized_in;
+// int overutilized_out;
+//
+// for (i = 0; i < solver->networks_count; i++)
+// {
+// net = solver->network_entries[i];
+//
+// overutilized_in = net.bw_in_assigned > net.bw_in_available;
+// overutilized_out = net.bw_out_assigned > net.bw_out_available;
+//
+// solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 0] = ((double) net.bw_in_assigned / (double) net.bw_in_available)*10;
+// solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 1] = (double) overutilized_in;
+// solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 2] = ((double) net.bw_out_assigned / (double) net.bw_out_available)*10;
+// solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 3] = (double) overutilized_out;
+// }
+//}
/**
* Allocates a state vector and fills it with the features present
state[0] = (double) net->bw_in_assigned / 1024; //(double) net->bw_in_available;
if (net->bw_in_assigned > net->bw_in_available)
{
- state[1] = (double)(net->bw_in_assigned - net->bw_in_available) / 1024;// net->bw_in_available;
+ state[1] = 1;// net->bw_in_available;
}
else
{
state[1] = 0;
}
- LOG(GNUNET_ERROR_TYPE_INFO, "state[0] = %f\n", state[0]);
- LOG(GNUNET_ERROR_TYPE_INFO, "state[1] = %f\n", state[1]);
+ LOG(GNUNET_ERROR_TYPE_INFO, "get_state() state[0] = %f\n", state[0]);
+ LOG(GNUNET_ERROR_TYPE_INFO, "get_state() state[1] = %f\n", state[1]);
- LOG(GNUNET_ERROR_TYPE_INFO, "W / %08.3f %08.3f \\ \n", agent->W[0][0], agent->W[1][0]);
- LOG(GNUNET_ERROR_TYPE_INFO, "W \\ %08.3f %08.3f / \n", agent->W[0][1], agent->W[1][1]);
+ LOG(GNUNET_ERROR_TYPE_INFO, "get_state() W / %08.3f %08.3f \\ \n", agent->W[0][0], agent->W[1][0]);
+ LOG(GNUNET_ERROR_TYPE_INFO, "get_state() W \\ %08.3f %08.3f / \n", agent->W[0][1], agent->W[1][1]);
//get peer features
return state;
}
-/**
- * For all networks a peer has an address in, this gets the maximum bandwidth which could
- * theoretically be available in one of the networks. This is used for bandwidth normalization.
- *
- * @param agent the agent handle
- * @param direction_in whether the inbound bandwidth should be considered. Returns the maximum outbound bandwidth if GNUNET_NO
- */
-static unsigned long long
-ril_get_max_bw (struct RIL_Peer_Agent *agent, int direction_in)
-{
- /*
- * get the maximum bandwidth possible for a peer, e.g. among all addresses which addresses'
- * network could provide the maximum bandwidth if all that bandwidth was used on that one peer.
- */
- unsigned long long max = 0;
- struct RIL_Address_Wrapped *cur;
- struct RIL_Network *net;
-
- for (cur = agent->addresses_head; NULL != cur; cur = cur->next)
- {
- net = cur->address_naked->solver_information;
- if (direction_in)
- {
- if (net->bw_in_available > max)
- {
- max = net->bw_in_available;
- }
- }
- else
- {
- if (net->bw_out_available > max)
- {
- max = net->bw_out_available;
- }
- }
- }
- return max;
-}
-
-/**
- * Get the index of the quality-property in question
- *
- * @param type the quality property type
- * @return the index
- */
-static int
-ril_find_property_index (uint32_t type)
-{
- int existing_types[] = GNUNET_ATS_QualityProperties;
- int c;
- for (c = 0; c < GNUNET_ATS_QualityPropertiesCount; c++)
- if (existing_types[c] == type)
- return c;
- return GNUNET_SYSERR;
-}
-
-static int
-ril_get_atsi (struct ATS_Address *address, uint32_t type)
-{
- int c1;
- GNUNET_assert(NULL != address);
-
- if ((NULL == address->atsi) || (0 == address->atsi_count))
- return 0;
-
- for (c1 = 0; c1 < address->atsi_count; c1++)
- {
- if (ntohl (address->atsi[c1].type) == type)
- return ntohl (address->atsi[c1].value);
- }
- return 0;
-}
-
-static double
-envi_reward_global (struct GAS_RIL_Handle *solver)
-{
- int i;
- struct RIL_Network net;
- unsigned int sum_in_available = 0;
- unsigned int sum_out_available = 0;
- unsigned int sum_in_assigned = 0;
- unsigned int sum_out_assigned = 0;
- double ratio_in;
- double ratio_out;
-
- for (i = 0; i < solver->networks_count; i++)
- {
- net = solver->network_entries[i];
- sum_in_available += net.bw_in_available;
- sum_in_assigned += net.bw_in_assigned;
- sum_out_available += net.bw_out_available;
- sum_out_assigned += net.bw_out_assigned;
- }
-
- ratio_in = ((double) sum_in_assigned) / ((double) sum_in_available);
- ratio_out = ((double) sum_out_assigned) / ((double) sum_out_available);
-
- // global reward in [1,2]
- return ratio_in +1;
- return ((ratio_in + ratio_out) / 2) + 1;
-}
-
-static double
-envi_reward_local (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
-{
- const double *preferences;
- const double *properties;
- int prop_index;
- double pref_match = 0;
- double bw_norm;
- double dl_norm;
-
- preferences = solver->plugin_envi->get_preferences (solver->plugin_envi->get_preference_cls,
- &agent->peer);
- properties = solver->plugin_envi->get_property (solver->plugin_envi->get_property_cls,
- agent->address_inuse);
-
- // delay in [0,1]
- prop_index = ril_find_property_index (GNUNET_ATS_QUALITY_NET_DELAY);
- dl_norm = 2 - properties[prop_index]; //invert property as we want to maximize for lower latencies
-
- // utilization in [0,1]
- bw_norm = (((double) ril_get_atsi (agent->address_inuse, GNUNET_ATS_UTILIZATION_IN)
- / (double) ril_get_max_bw (agent, GNUNET_YES))
- + ((double) ril_get_atsi (agent->address_inuse, GNUNET_ATS_UTILIZATION_OUT)
- / (double) ril_get_max_bw (agent, GNUNET_NO))) / 2;
-
- // preference matching in [0,4]
- pref_match += (preferences[GNUNET_ATS_PREFERENCE_LATENCY] * dl_norm);
- pref_match += (preferences[GNUNET_ATS_PREFERENCE_BANDWIDTH] * bw_norm);
-
- // local reward in [1,2]
- return (pref_match / 4) +1;
-}
+///**
+// * For all networks a peer has an address in, this gets the maximum bandwidth which could
+// * theoretically be available in one of the networks. This is used for bandwidth normalization.
+// *
+// * @param agent the agent handle
+// * @param direction_in whether the inbound bandwidth should be considered. Returns the maximum outbound bandwidth if GNUNET_NO
+// */
+//static unsigned long long
+//ril_get_max_bw (struct RIL_Peer_Agent *agent, int direction_in)
+//{
+// /*
+// * get the maximum bandwidth possible for a peer, e.g. among all addresses which addresses'
+// * network could provide the maximum bandwidth if all that bandwidth was used on that one peer.
+// */
+// unsigned long long max = 0;
+// struct RIL_Address_Wrapped *cur;
+// struct RIL_Network *net;
+//
+// for (cur = agent->addresses_head; NULL != cur; cur = cur->next)
+// {
+// net = cur->address_naked->solver_information;
+// if (direction_in)
+// {
+// if (net->bw_in_available > max)
+// {
+// max = net->bw_in_available;
+// }
+// }
+// else
+// {
+// if (net->bw_out_available > max)
+// {
+// max = net->bw_out_available;
+// }
+// }
+// }
+// return max;
+//}
+
+///**
+// * Get the index of the quality-property in question
+// *
+// * @param type the quality property type
+// * @return the index
+// */
+//static int
+//ril_find_property_index (uint32_t type)
+//{
+// int existing_types[] = GNUNET_ATS_QualityProperties;
+// int c;
+// for (c = 0; c < GNUNET_ATS_QualityPropertiesCount; c++)
+// if (existing_types[c] == type)
+// return c;
+// return GNUNET_SYSERR;
+//}
+
+//static int
+//ril_get_atsi (struct ATS_Address *address, uint32_t type)
+//{
+// int c1;
+// GNUNET_assert(NULL != address);
+//
+// if ((NULL == address->atsi) || (0 == address->atsi_count))
+// return 0;
+//
+// for (c1 = 0; c1 < address->atsi_count; c1++)
+// {
+// if (ntohl (address->atsi[c1].type) == type)
+// return ntohl (address->atsi[c1].value);
+// }
+// return 0;
+//}
+
+//static double
+//envi_reward_global (struct GAS_RIL_Handle *solver)
+//{
+// int i;
+// struct RIL_Network net;
+// unsigned int sum_in_available = 0;
+// unsigned int sum_out_available = 0;
+// unsigned int sum_in_assigned = 0;
+// unsigned int sum_out_assigned = 0;
+// double ratio_in;
+// double ratio_out;
+//
+// for (i = 0; i < solver->networks_count; i++)
+// {
+// net = solver->network_entries[i];
+// sum_in_available += net.bw_in_available;
+// sum_in_assigned += net.bw_in_assigned;
+// sum_out_available += net.bw_out_available;
+// sum_out_assigned += net.bw_out_assigned;
+// }
+//
+// ratio_in = ((double) sum_in_assigned) / ((double) sum_in_available);
+// ratio_out = ((double) sum_out_assigned) / ((double) sum_out_available);
+//
+// // global reward in [1,2]
+// return ratio_in +1;
+// return ((ratio_in + ratio_out) / 2) + 1;
+//}
+
+//static double
+//envi_reward_local (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
+//{
+// const double *preferences;
+// const double *properties;
+// int prop_index;
+// double pref_match = 0;
+// double bw_norm;
+// double dl_norm;
+//
+// preferences = solver->plugin_envi->get_preferences (solver->plugin_envi->get_preference_cls,
+// &agent->peer);
+// properties = solver->plugin_envi->get_property (solver->plugin_envi->get_property_cls,
+// agent->address_inuse);
+//
+// // delay in [0,1]
+// prop_index = ril_find_property_index (GNUNET_ATS_QUALITY_NET_DELAY);
+// dl_norm = 2 - properties[prop_index]; //invert property as we want to maximize for lower latencies
+//
+// // utilization in [0,1]
+// bw_norm = (((double) ril_get_atsi (agent->address_inuse, GNUNET_ATS_UTILIZATION_IN)
+// / (double) ril_get_max_bw (agent, GNUNET_YES))
+// + ((double) ril_get_atsi (agent->address_inuse, GNUNET_ATS_UTILIZATION_OUT)
+// / (double) ril_get_max_bw (agent, GNUNET_NO))) / 2;
+//
+// // preference matching in [0,4]
+// pref_match += (preferences[GNUNET_ATS_PREFERENCE_LATENCY] * dl_norm);
+// pref_match += (preferences[GNUNET_ATS_PREFERENCE_BANDWIDTH] * bw_norm);
+//
+// // local reward in [1,2]
+// return (pref_match / 4) +1;
+//}
/**
* Gets the reward for the last performed step, which is calculated in equal
if (direction_in)
{
- new_bw = agent->bw_in + (1 * MIN_BW);
+ new_bw = agent->bw_in + (RIL_INC_DEC_STEP_SIZE * MIN_BW);
if (new_bw < agent->bw_in || new_bw > GNUNET_ATS_MaxBandwidth)
new_bw = GNUNET_ATS_MaxBandwidth;
envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw,
}
else
{
- new_bw = agent->bw_out + (1 * MIN_BW);
+ new_bw = agent->bw_out + (RIL_INC_DEC_STEP_SIZE * MIN_BW);
if (new_bw < agent->bw_out || new_bw > GNUNET_ATS_MaxBandwidth)
new_bw = GNUNET_ATS_MaxBandwidth;
envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in,
if (direction_in)
{
- new_bw = agent->bw_in - (1 * MIN_BW);
+ new_bw = agent->bw_in - (RIL_INC_DEC_STEP_SIZE * MIN_BW);
if (new_bw < MIN_BW || new_bw > agent->bw_in)
new_bw = MIN_BW;
envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out,
}
else
{
- new_bw = agent->bw_out - (1 * MIN_BW);
+ new_bw = agent->bw_out - (RIL_INC_DEC_STEP_SIZE * MIN_BW);
if (new_bw < MIN_BW || new_bw > agent->bw_out)
new_bw = MIN_BW;
envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw,
agent_step (struct RIL_Peer_Agent *agent)
{
int a_next = RIL_ACTION_INVALID;
+ int explore;
double *s_next;
double reward;
s_next = envi_get_state (agent->envi, agent);
reward = envi_get_reward (agent->envi, agent);
-
- GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu: A: %d R: %f IN %llu OUT %llu\n",
- agent->step_count,
- agent->a_old,
- reward,
- agent->bw_in/1024,
- agent->bw_out/1024);
- GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu: Best A: %d Q(s,a): %f \n",
- agent->step_count,
- agent_get_action_best (agent, s_next),
- agent_estimate_q(agent, s_next, agent_get_action_best (agent, s_next)));
+ explore = agent_decide_exploration (agent);
switch (agent->envi->parameters.algorithm)
{
case RIL_ALGO_SARSA:
- if (agent_decide_exploration (agent))
+ if (explore)
{
a_next = agent_get_action_explore (agent, s_next);
}
//updates weights with best action, disregarding actually selected action (off-policy), if not first step
agent_update_weights (agent, reward, s_next, a_next);
}
- if (agent_decide_exploration (agent))
+ if (explore)
{
a_next = agent_get_action_explore (agent, s_next);
agent_modify_eligibility (agent, RIL_E_ZERO, NULL);
agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next);
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO, "step() Step# %llu R: %f IN %llu OUT %llu A: %d\n",
+ agent->step_count,
+ reward,
+ agent->bw_in/1024,
+ agent->bw_out/1024,
+ a_next);
+
envi_do_action (agent->envi, agent, a_next);
GNUNET_free(agent->s_old);
double tau;
// MDP case - remove when debugged
- if (solver->parameters.step_time_min.rel_value_us == solver->parameters.step_time_max.rel_value_us)
+ if (solver->simulate)
{
solver->global_discount_variable = solver->parameters.gamma;
solver->global_discount_integrated = 1;
time_next = GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MICROSECONDS, (unsigned long long) y);
+ if (solver->simulate)
+ {
+ time_next = GNUNET_TIME_UNIT_ZERO;
+ }
+
if ((GNUNET_SCHEDULER_NO_TASK == solver->step_next_task_id) && (GNUNET_NO == solver->done))
{
solver->step_next_task_id = GNUNET_SCHEDULER_add_delayed (time_next, &ril_step_scheduler_task,
for (k = 0; k < agent->m; k++)
{
if (0 == count) {
- agent->W[i][k] = 1.1 - ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX/5)/(double)UINT32_MAX);
+ agent->W[i][k] = 1;//.1 - ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX/5)/(double)UINT32_MAX);
}
else {
for (other = agent->envi->agents_head; NULL != other; other = other->next)
{
solver->parameters.reward_global_share = RIL_DEFAULT_GLOBAL_REWARD_SHARE;
}
+ if (GNUNET_OK != GNUNET_CONFIGURATION_get_value_number (env->cfg, "ats", "RIL_SIMULATE", &solver->simulate))
+ {
+ solver->simulate = GNUNET_NO;
+ }
env->sf.s_add = &GAS_ril_address_add;
env->sf.s_address_update_property = &GAS_ril_address_property_changed;
cur->type = env->networks[c];
cur->bw_in_available = env->in_quota[c];
cur->bw_out_available = env->out_quota[c];
- LOG(GNUNET_ERROR_TYPE_INFO, "Quotas for %s network: IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024);
+ LOG(GNUNET_ERROR_TYPE_INFO, "init() Quotas for %s network: IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024);
}
- LOG(GNUNET_ERROR_TYPE_INFO, "Parameters:\n");
- LOG(GNUNET_ERROR_TYPE_INFO, "Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n",
+ LOG(GNUNET_ERROR_TYPE_INFO, "init() Parameters:\n");
+ LOG(GNUNET_ERROR_TYPE_INFO, "init() Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n",
solver->parameters.algorithm ? "Q" : "SARSA",
solver->parameters.alpha,
solver->parameters.beta,
solver->parameters.lambda);
- LOG(GNUNET_ERROR_TYPE_INFO, "explore = %f, global_share = %f\n",
+ LOG(GNUNET_ERROR_TYPE_INFO, "init() explore = %f, global_share = %f\n",
solver->parameters.explore_ratio,
solver->parameters.reward_global_share);
m_new = agent->m - RIL_FEATURES_ADDRESS_COUNT;
n_new = agent->n - 1;
+ LOG(GNUNET_ERROR_TYPE_DEBUG, "first\n");
+
for (i = 0; i < agent->n; i++)
{
ril_cut_from_vector ((void **) &agent->W[i], sizeof(double),
- ((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+ //((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+ ((RIL_FEATURES_NETWORK_COUNT) //TODO! replace, when adding more networks
+ (address_index * RIL_FEATURES_ADDRESS_COUNT)), RIL_FEATURES_ADDRESS_COUNT, agent->m);
}
GNUNET_free(agent->W[RIL_ACTION_TYPE_NUM + address_index]);
+ LOG(GNUNET_ERROR_TYPE_DEBUG, "second\n");
ril_cut_from_vector ((void **) &agent->W, sizeof(double *), RIL_ACTION_TYPE_NUM + address_index,
1, agent->n);
//correct last action
agent->a_old = RIL_ACTION_INVALID;
}
//decrease old state vector and eligibility vector
+ LOG(GNUNET_ERROR_TYPE_DEBUG, "third\n");
ril_cut_from_vector ((void **) &agent->s_old, sizeof(double),
- ((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+ //((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+ ((RIL_FEATURES_NETWORK_COUNT) //TODO! replace when adding more networks
+ (address_index * RIL_FEATURES_ADDRESS_COUNT)), RIL_FEATURES_ADDRESS_COUNT, agent->m);
ril_cut_from_vector ((void **) &agent->e, sizeof(double),
- ((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+ //((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+ ((RIL_FEATURES_NETWORK_COUNT) //TODO! replace when adding more networks
+ (address_index * RIL_FEATURES_ADDRESS_COUNT)), RIL_FEATURES_ADDRESS_COUNT, agent->m);
agent->m = m_new;
agent->n = n_new;