#define LOG(kind,...) GNUNET_log_from (kind, "ats-ril",__VA_ARGS__)
+#define MIN_BW ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__)
+
#define RIL_ACTION_INVALID -1
-#define RIL_FEATURES_ADDRESS_COUNT (3 + GNUNET_ATS_QualityPropertiesCount)
-#define RIL_FEATURES_NETWORK_COUNT 6
+#define RIL_FEATURES_ADDRESS_COUNT (0)// + GNUNET_ATS_QualityPropertiesCount)
+#define RIL_FEATURES_NETWORK_COUNT 2
#define RIL_INTERVAL_EXPONENT 10
#define RIL_DEFAULT_STEP_TIME_MIN GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 500)
#define RIL_DEFAULT_STEP_TIME_MAX GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 3000)
#define RIL_DEFAULT_ALGORITHM RIL_ALGO_SARSA
#define RIL_DEFAULT_DISCOUNT_BETA 1
+#define RIL_DEFAULT_DISCOUNT_GAMMA 0.5
#define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.1
#define RIL_DEFAULT_TRACE_DECAY 0.5
#define RIL_DEFAULT_EXPLORE_RATIO 0.1
#define RIL_DEFAULT_GLOBAL_REWARD_SHARE 0.5
+#define RIL_INC_DEC_STEP_SIZE 1
+
/**
* ATS reinforcement learning solver
*
*/
enum RIL_Action_Type
{
- RIL_ACTION_NOTHING = 0,
- RIL_ACTION_BW_IN_DBL = -1, //TODO! put actions back
- RIL_ACTION_BW_IN_HLV = -2,
- RIL_ACTION_BW_IN_INC = 1,
- RIL_ACTION_BW_IN_DEC = 2,
- RIL_ACTION_BW_OUT_DBL = -3,
- RIL_ACTION_BW_OUT_HLV = -4,
- RIL_ACTION_BW_OUT_INC = 3,
- RIL_ACTION_BW_OUT_DEC = 4,
- RIL_ACTION_TYPE_NUM = 5
+ RIL_ACTION_NOTHING = -1,
+ RIL_ACTION_BW_IN_DBL = -2, //TODO! put actions back
+ RIL_ACTION_BW_IN_HLV = -3,
+ RIL_ACTION_BW_IN_INC = 0,
+ RIL_ACTION_BW_IN_DEC = 1,
+ RIL_ACTION_BW_OUT_DBL = -4,
+ RIL_ACTION_BW_OUT_HLV = -5,
+ RIL_ACTION_BW_OUT_INC = -6,
+ RIL_ACTION_BW_OUT_DEC = -7,
+ RIL_ACTION_TYPE_NUM = 1
};
enum RIL_Algorithm
enum RIL_Algorithm algorithm;
/**
- * Learning discount factor in the TD-update
+ * Gradient-descent step-size
+ */
+ double alpha;
+
+ /**
+ * Learning discount variable in the TD-update for semi-MDPs
*/
double beta;
/**
- * Gradient-descent step-size
+ * Learning discount factor in the TD-update for MDPs
*/
- double alpha;
+ double gamma;
/**
* Trace-decay factor for eligibility traces
double lambda;
/**
- * Ratio, whith what probability an agent should explore in the e-greed policy
+ * Ratio, with what probability an agent should explore in the e-greed policy
*/
double explore_ratio;
int suggestion_issue;
/**
- * The address which has to be issued ()
+ * The address which has to be issued
*/
struct ATS_Address * suggestion_address;
};
*/
unsigned long long bw_in_available;
+ /**
+ * Bandwidth inbound assigned in network after last step
+ */
+ unsigned long long bw_in_assigned;
+
/**
* Total available outbound bandwidth
*/
unsigned long long bw_out_available;
+
+ /**
+ * * Bandwidth outbound assigned in network after last step
+ */
+ unsigned long long bw_out_assigned;
};
/**
*/
GNUNET_SCHEDULER_TaskIdentifier step_next_task_id;
- /**
- * Whether a step is already scheduled
- */
- int step_task_pending;
-
/**
* Variable discount factor, dependent on time between steps
*/
*/
struct RIL_Peer_Agent * agents_head;
struct RIL_Peer_Agent * agents_tail;
+
+ /**
+ * Shutdown
+ */
+ int done;
+
+ /**
+ * Simulate steps, i.e. schedule steps immediately
+ */
+ unsigned long long simulate;
};
/*
if (isinf(result))
{
- return isinf(result) * (DBL_MAX / 2); //is still big enough
+ return isinf(result) * UINT32_MAX; //TODO! fix
}
return result;
delta += agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime); //discounted future value
delta -= agent_estimate_q (agent, agent->s_old, agent->a_old); //one step
+ LOG(GNUNET_ERROR_TYPE_INFO, "update() Step# %llu Q(s,a): %f a: %f r: %f y: %f Q(s+1,a+1) = %f delta: %f\n",
+ agent->step_count,
+ agent_estimate_q (agent, agent->s_old, agent->a_old),
+ agent->envi->parameters.alpha,
+ reward,
+ agent->envi->global_discount_variable,
+ agent_estimate_q (agent, s_next, a_prime),
+ delta);
+
for (i = 0; i < agent->m; i++)
{
- theta[i] += agent->envi->parameters.alpha * delta * agent->e[i];
+// LOG(GNUNET_ERROR_TYPE_INFO, "alpha = %f delta = %f e[%d] = %f\n",
+// agent->envi->parameters.alpha,
+// delta,
+// i,
+// agent->e[i]);
+ theta[i] += agent->envi->parameters.alpha * delta * agent->s_old[i];// * agent->e[i];
}
}
* @param mod the kind of modification
*/
static void
-agent_modify_eligibility (struct RIL_Peer_Agent *agent, enum RIL_E_Modification mod)
+agent_modify_eligibility (struct RIL_Peer_Agent *agent, enum RIL_E_Modification mod, double *f)
{
int i;
double *e = agent->e;
switch (mod)
{
case RIL_E_ACCUMULATE:
- e[i] += 1;
+ e[i] += f[i];
break;
case RIL_E_REPLACE:
- e[i] = 1;
+ e[i] = f[i];
break;
case RIL_E_SET:
e[i] *= agent->envi->global_discount_variable * agent->envi->parameters.lambda;
return sum;
}
-static void
-envi_state_networks (struct GAS_RIL_Handle *solver)
-{
- int i;
- struct RIL_Network net;
- unsigned long long assigned_in;
- unsigned long long assigned_out;
- int overutilized_in;
- int overutilized_out;
-
- for (i = 0; i < solver->networks_count; i++)
- {
- net = solver->network_entries[i];
-
- assigned_in = ril_network_get_assigned(solver, net.type, GNUNET_YES);
- assigned_out = ril_network_get_assigned(solver, net.type, GNUNET_NO);
- overutilized_in = assigned_in > net.bw_in_available;
- overutilized_out = assigned_out > net.bw_out_available;
-
- solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 0] = (double) assigned_in;
- solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 1] = (double) net.bw_in_available;
- solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 2] = (double) overutilized_in;
- solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 3] = (double) assigned_out;
- solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 4] = (double) net.bw_out_available;
- solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 5] = (double) overutilized_out;
- }
-}
+//static void
+//envi_state_networks (struct GAS_RIL_Handle *solver)
+//{
+// int i;
+// struct RIL_Network net;
+// int overutilized_in;
+// int overutilized_out;
+//
+// for (i = 0; i < solver->networks_count; i++)
+// {
+// net = solver->network_entries[i];
+//
+// overutilized_in = net.bw_in_assigned > net.bw_in_available;
+// overutilized_out = net.bw_out_assigned > net.bw_out_available;
+//
+// solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 0] = ((double) net.bw_in_assigned / (double) net.bw_in_available)*10;
+// solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 1] = (double) overutilized_in;
+// solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 2] = ((double) net.bw_out_assigned / (double) net.bw_out_available)*10;
+// solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 3] = (double) overutilized_out;
+// }
+//}
/**
* Allocates a state vector and fills it with the features present
envi_get_state (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
{
int i;
- int k;
+// int k;
double *state = GNUNET_malloc (sizeof (double) * agent->m);
struct RIL_Address_Wrapped *cur_address;
- const double *preferences;
- const double *properties;
+// const double *preferences;
+// const double *properties;
+ struct RIL_Network *net;
//copy global networks state
for (i = 0; i < solver->networks_count * RIL_FEATURES_NETWORK_COUNT; i++)
{
- state[i] = solver->global_state_networks[i];
+// state[i] = solver->global_state_networks[i];
}
+ net = agent->address_inuse->solver_information;
- //get peer features
- preferences = solver->plugin_envi->get_preferences (solver->plugin_envi->get_preference_cls,
- &agent->peer);
- for (k = 0; k < GNUNET_ATS_PreferenceCount; k++)
+ state[0] = (double) net->bw_in_assigned / 1024; //(double) net->bw_in_available;
+ if (net->bw_in_assigned > net->bw_in_available)
{
- state[i++] = preferences[k];
+ state[1] = 1;// net->bw_in_available;
}
-
- //get address specific features
- for (cur_address = agent->addresses_head; NULL != cur_address; cur_address = cur_address->next)
- {
- //when changing the number of address specific state features, change RIL_FEATURES_ADDRESS_COUNT macro
- state[i++] = cur_address->address_naked->active;
- state[i++] = cur_address->address_naked->active ? agent->bw_in : 0;
- state[i++] = cur_address->address_naked->active ? agent->bw_out : 0;
- properties = solver->plugin_envi->get_property (solver->plugin_envi->get_property_cls,
- cur_address->address_naked);
- for (k = 0; k < GNUNET_ATS_QualityPropertiesCount; k++)
- {
- state[i++] = properties[k];
- }
- }
-
- return state;
-}
-
-/**
- * For all networks a peer has an address in, this gets the maximum bandwidth which could
- * theoretically be available in one of the networks. This is used for bandwidth normalization.
- *
- * @param agent the agent handle
- * @param direction_in whether the inbound bandwidth should be considered. Returns the maximum outbound bandwidth if GNUNET_NO
- */
-static unsigned long long
-ril_get_max_bw (struct RIL_Peer_Agent *agent, int direction_in)
-{
- /*
- * get the maximum bandwidth possible for a peer, e.g. among all addresses which addresses'
- * network could provide the maximum bandwidth if all that bandwidth was used on that one peer.
- */
- unsigned long long max = 0;
- struct RIL_Address_Wrapped *cur;
- struct RIL_Network *net;
-
- for (cur = agent->addresses_head; NULL != cur; cur = cur->next)
+ else
{
- net = cur->address_naked->solver_information;
- if (direction_in)
- {
- if (net->bw_in_available > max)
- {
- max = net->bw_in_available;
- }
- }
- else
- {
- if (net->bw_out_available > max)
- {
- max = net->bw_out_available;
- }
- }
+ state[1] = 0;
}
- return max;
-}
+ LOG(GNUNET_ERROR_TYPE_INFO, "get_state() state[0] = %f\n", state[0]);
+ LOG(GNUNET_ERROR_TYPE_INFO, "get_state() state[1] = %f\n", state[1]);
-/**
- * Get the index of the quality-property in question
- *
- * @param type the quality property type
- * @return the index
- */
-static int
-ril_find_property_index (uint32_t type)
-{
- int existing_types[] = GNUNET_ATS_QualityProperties;
- int c;
- for (c = 0; c < GNUNET_ATS_QualityPropertiesCount; c++)
- if (existing_types[c] == type)
- return c;
- return GNUNET_SYSERR;
-}
-
-static int
-ril_get_atsi (struct ATS_Address *address, uint32_t type)
-{
- int c1;
- GNUNET_assert(NULL != address);
+ LOG(GNUNET_ERROR_TYPE_INFO, "get_state() W / %08.3f %08.3f \\ \n", agent->W[0][0], agent->W[1][0]);
+ LOG(GNUNET_ERROR_TYPE_INFO, "get_state() W \\ %08.3f %08.3f / \n", agent->W[0][1], agent->W[1][1]);
- if ((NULL == address->atsi) || (0 == address->atsi_count))
- return 0;
- for (c1 = 0; c1 < address->atsi_count; c1++)
- {
- if (ntohl (address->atsi[c1].type) == type)
- return ntohl (address->atsi[c1].value);
- }
- return 0;
-}
-
-static double
-envi_reward_global (struct GAS_RIL_Handle *solver)
-{
- int i;
- struct RIL_Network net;
- unsigned int sum_in_available = 0;
- unsigned int sum_out_available = 0;
- unsigned int sum_in_assigned = 0;
- unsigned int sum_out_assigned = 0;
- double ratio_in;
- double ratio_out;
+ //get peer features
+// preferences = solver->plugin_envi->get_preferences (solver->plugin_envi->get_preference_cls,
+// &agent->peer);
+// for (k = 0; k < GNUNET_ATS_PreferenceCount; k++)
+// {
+// state[i++] = preferences[k];
+// }
- for (i = 0; i < solver->networks_count; i++)
+ //get address specific features
+ for (cur_address = agent->addresses_head; NULL != cur_address; cur_address = cur_address->next)
{
- net = solver->network_entries[i];
- sum_in_available += net.bw_in_available;
- sum_in_assigned += ril_network_get_assigned(solver, net.type, GNUNET_YES);
- sum_out_available += net.bw_out_available;
- sum_out_assigned += ril_network_get_assigned(solver, net.type, GNUNET_NO);
+// //when changing the number of address specific state features, change RIL_FEATURES_ADDRESS_COUNT macro
+// state[i++] = cur_address->address_naked->active;
+// state[i++] = cur_address->address_naked->active ? agent->bw_in : 0;
+// state[i++] = cur_address->address_naked->active ? agent->bw_out : 0;
+// properties = solver->plugin_envi->get_property (solver->plugin_envi->get_property_cls,
+// cur_address->address_naked);
+// for (k = 0; k < GNUNET_ATS_QualityPropertiesCount; k++)
+// {
+// state[i++] = properties[k];
+// }
}
- ratio_in = ((double) sum_in_assigned) / ((double) sum_in_available);
- ratio_out = ((double) sum_out_assigned) / ((double) sum_out_available);
-
- // global reward in [1,2]
- return ((ratio_in + ratio_out) / 2) + 1;
+ return state;
}
-static double
-envi_reward_local (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
-{
- const double *preferences;
- const double *properties;
- int prop_index;
- double pref_match = 0;
- double bw_norm;
- double dl_norm;
-
- preferences = solver->plugin_envi->get_preferences (solver->plugin_envi->get_preference_cls,
- &agent->peer);
- properties = solver->plugin_envi->get_property (solver->plugin_envi->get_property_cls,
- agent->address_inuse);
-
- // delay in [0,1]
- prop_index = ril_find_property_index (GNUNET_ATS_QUALITY_NET_DELAY);
- dl_norm = 2 - properties[prop_index]; //invert property as we want to maximize for lower latencies
-
- // utilization in [0,1]
- bw_norm = (((double) ril_get_atsi (agent->address_inuse, GNUNET_ATS_UTILIZATION_IN)
- / (double) ril_get_max_bw (agent, GNUNET_YES))
- + ((double) ril_get_atsi (agent->address_inuse, GNUNET_ATS_UTILIZATION_OUT)
- / (double) ril_get_max_bw (agent, GNUNET_NO))) / 2;
-
- // preference matching in [0,4]
- pref_match += (preferences[GNUNET_ATS_PREFERENCE_LATENCY] * dl_norm);
- pref_match += (preferences[GNUNET_ATS_PREFERENCE_BANDWIDTH] * bw_norm);
-
- // local reward in [1,2]
- return (pref_match / 4) +1;
-}
+///**
+// * For all networks a peer has an address in, this gets the maximum bandwidth which could
+// * theoretically be available in one of the networks. This is used for bandwidth normalization.
+// *
+// * @param agent the agent handle
+// * @param direction_in whether the inbound bandwidth should be considered. Returns the maximum outbound bandwidth if GNUNET_NO
+// */
+//static unsigned long long
+//ril_get_max_bw (struct RIL_Peer_Agent *agent, int direction_in)
+//{
+// /*
+// * get the maximum bandwidth possible for a peer, e.g. among all addresses which addresses'
+// * network could provide the maximum bandwidth if all that bandwidth was used on that one peer.
+// */
+// unsigned long long max = 0;
+// struct RIL_Address_Wrapped *cur;
+// struct RIL_Network *net;
+//
+// for (cur = agent->addresses_head; NULL != cur; cur = cur->next)
+// {
+// net = cur->address_naked->solver_information;
+// if (direction_in)
+// {
+// if (net->bw_in_available > max)
+// {
+// max = net->bw_in_available;
+// }
+// }
+// else
+// {
+// if (net->bw_out_available > max)
+// {
+// max = net->bw_out_available;
+// }
+// }
+// }
+// return max;
+//}
+
+///**
+// * Get the index of the quality-property in question
+// *
+// * @param type the quality property type
+// * @return the index
+// */
+//static int
+//ril_find_property_index (uint32_t type)
+//{
+// int existing_types[] = GNUNET_ATS_QualityProperties;
+// int c;
+// for (c = 0; c < GNUNET_ATS_QualityPropertiesCount; c++)
+// if (existing_types[c] == type)
+// return c;
+// return GNUNET_SYSERR;
+//}
+
+//static int
+//ril_get_atsi (struct ATS_Address *address, uint32_t type)
+//{
+// int c1;
+// GNUNET_assert(NULL != address);
+//
+// if ((NULL == address->atsi) || (0 == address->atsi_count))
+// return 0;
+//
+// for (c1 = 0; c1 < address->atsi_count; c1++)
+// {
+// if (ntohl (address->atsi[c1].type) == type)
+// return ntohl (address->atsi[c1].value);
+// }
+// return 0;
+//}
+
+//static double
+//envi_reward_global (struct GAS_RIL_Handle *solver)
+//{
+// int i;
+// struct RIL_Network net;
+// unsigned int sum_in_available = 0;
+// unsigned int sum_out_available = 0;
+// unsigned int sum_in_assigned = 0;
+// unsigned int sum_out_assigned = 0;
+// double ratio_in;
+// double ratio_out;
+//
+// for (i = 0; i < solver->networks_count; i++)
+// {
+// net = solver->network_entries[i];
+// sum_in_available += net.bw_in_available;
+// sum_in_assigned += net.bw_in_assigned;
+// sum_out_available += net.bw_out_available;
+// sum_out_assigned += net.bw_out_assigned;
+// }
+//
+// ratio_in = ((double) sum_in_assigned) / ((double) sum_in_available);
+// ratio_out = ((double) sum_out_assigned) / ((double) sum_out_available);
+//
+// // global reward in [1,2]
+// return ratio_in +1;
+// return ((ratio_in + ratio_out) / 2) + 1;
+//}
+
+//static double
+//envi_reward_local (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
+//{
+// const double *preferences;
+// const double *properties;
+// int prop_index;
+// double pref_match = 0;
+// double bw_norm;
+// double dl_norm;
+//
+// preferences = solver->plugin_envi->get_preferences (solver->plugin_envi->get_preference_cls,
+// &agent->peer);
+// properties = solver->plugin_envi->get_property (solver->plugin_envi->get_property_cls,
+// agent->address_inuse);
+//
+// // delay in [0,1]
+// prop_index = ril_find_property_index (GNUNET_ATS_QUALITY_NET_DELAY);
+// dl_norm = 2 - properties[prop_index]; //invert property as we want to maximize for lower latencies
+//
+// // utilization in [0,1]
+// bw_norm = (((double) ril_get_atsi (agent->address_inuse, GNUNET_ATS_UTILIZATION_IN)
+// / (double) ril_get_max_bw (agent, GNUNET_YES))
+// + ((double) ril_get_atsi (agent->address_inuse, GNUNET_ATS_UTILIZATION_OUT)
+// / (double) ril_get_max_bw (agent, GNUNET_NO))) / 2;
+//
+// // preference matching in [0,4]
+// pref_match += (preferences[GNUNET_ATS_PREFERENCE_LATENCY] * dl_norm);
+// pref_match += (preferences[GNUNET_ATS_PREFERENCE_BANDWIDTH] * bw_norm);
+//
+// // local reward in [1,2]
+// return (pref_match / 4) +1;
+//}
/**
* Gets the reward for the last performed step, which is calculated in equal
envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
{
struct RIL_Network *net;
- double reward = 0;
- unsigned long long assigned_in;
- unsigned long long assigned_out;
- unsigned long long overutilized = 0;
+// double reward = 0;
+ long long overutilized_in = 0;
+// long long overutilized_out;
+ long long assigned_in = 0;
+// long long assigned_out = 0;
+// long long unused;
//punish overutilization
net = agent->address_inuse->solver_information;
- assigned_in = ril_network_get_assigned(solver, net->type, GNUNET_YES);
- assigned_out = ril_network_get_assigned(solver, net->type, GNUNET_NO);
- if (assigned_in > net->bw_in_available)
+ if (net->bw_in_assigned > net->bw_in_available)
{
- overutilized += assigned_in - net->bw_in_available;
+ overutilized_in = (net->bw_in_assigned - net->bw_in_available);
+ assigned_in = net->bw_in_available;
}
- if (assigned_out > net->bw_out_available)
- {
- overutilized += assigned_out - net->bw_out_available;
- }
- if (overutilized > 0)
+ else
{
- //return -1. * (double) overutilized;
- return -1;
+ assigned_in = net->bw_in_assigned;
}
+// if (net->bw_out_assigned > net->bw_out_available)
+// {
+// overutilized_out = (net->bw_out_assigned - net->bw_out_available);
+// assigned_out = net->bw_out_available;
+// }
+// else
+// {
+// assigned_out = net->bw_out_assigned;
+// }
- reward += envi_reward_global (solver) * (solver->parameters.reward_global_share);
- reward += envi_reward_local (solver, agent) * (1 - solver->parameters.reward_global_share);
+// unused = net->bw_in_available - net->bw_in_assigned;
+// unused = unused < 0 ? unused : -unused;
- return reward;
+ return (double) (assigned_in - overutilized_in) / 1024;
+
+// reward += envi_reward_global (solver) * (solver->parameters.reward_global_share);
+// reward += envi_reward_local (solver, agent) * (1 - solver->parameters.reward_global_share);
+//
+// return (reward - 1.) * 100;
}
/**
if (direction_in)
{
new_bw = agent->bw_in * 2;
- if (new_bw < agent->bw_in)
- new_bw = ULLONG_MAX;
+ if (new_bw < agent->bw_in || new_bw > GNUNET_ATS_MaxBandwidth)
+ new_bw = GNUNET_ATS_MaxBandwidth;
envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw,
agent->bw_out, GNUNET_NO);
}
else
{
new_bw = agent->bw_out * 2;
- if (new_bw < agent->bw_out)
- new_bw = ULLONG_MAX;
+ if (new_bw < agent->bw_out || new_bw > GNUNET_ATS_MaxBandwidth)
+ new_bw = GNUNET_ATS_MaxBandwidth;
envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in,
new_bw, GNUNET_NO);
}
struct RIL_Peer_Agent *agent,
int direction_in)
{
- uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
unsigned long long new_bw;
if (direction_in)
{
new_bw = agent->bw_in / 2;
- if (new_bw < min_bw || new_bw > agent->bw_in)
- new_bw = min_bw;
+ if (new_bw < MIN_BW || new_bw > agent->bw_in)
+ new_bw = MIN_BW;
envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out,
GNUNET_NO);
}
else
{
new_bw = agent->bw_out / 2;
- if (new_bw < min_bw || new_bw > agent->bw_out)
- new_bw = min_bw;
+ if (new_bw < MIN_BW || new_bw > agent->bw_out)
+ new_bw = MIN_BW;
envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw,
GNUNET_NO);
}
envi_action_bw_inc (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent, int direction_in)
{
unsigned long long new_bw;
- uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
if (direction_in)
{
- new_bw = agent->bw_in + (5 * min_bw);
- if (new_bw < agent->bw_in)
- new_bw = ULLONG_MAX;
+ new_bw = agent->bw_in + (RIL_INC_DEC_STEP_SIZE * MIN_BW);
+ if (new_bw < agent->bw_in || new_bw > GNUNET_ATS_MaxBandwidth)
+ new_bw = GNUNET_ATS_MaxBandwidth;
envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw,
agent->bw_out, GNUNET_NO);
}
else
{
- new_bw = agent->bw_out + (5 * min_bw);
- if (new_bw < agent->bw_out)
- new_bw = ULLONG_MAX;
+ new_bw = agent->bw_out + (RIL_INC_DEC_STEP_SIZE * MIN_BW);
+ if (new_bw < agent->bw_out || new_bw > GNUNET_ATS_MaxBandwidth)
+ new_bw = GNUNET_ATS_MaxBandwidth;
envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in,
new_bw, GNUNET_NO);
}
static void
envi_action_bw_dec (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent, int direction_in)
{
- uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
unsigned long long new_bw;
if (direction_in)
{
- new_bw = agent->bw_in - (5 * min_bw);
- if (new_bw < min_bw || new_bw > agent->bw_in)
- new_bw = min_bw;
+ new_bw = agent->bw_in - (RIL_INC_DEC_STEP_SIZE * MIN_BW);
+ if (new_bw < MIN_BW || new_bw > agent->bw_in)
+ new_bw = MIN_BW;
envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out,
GNUNET_NO);
}
else
{
- new_bw = agent->bw_out - (5 * min_bw);
- if (new_bw < min_bw || new_bw > agent->bw_out)
- new_bw = min_bw;
+ new_bw = agent->bw_out - (RIL_INC_DEC_STEP_SIZE * MIN_BW);
+ if (new_bw < MIN_BW || new_bw > agent->bw_out)
+ new_bw = MIN_BW;
envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw,
GNUNET_NO);
}
agent_step (struct RIL_Peer_Agent *agent)
{
int a_next = RIL_ACTION_INVALID;
+ int explore;
double *s_next;
double reward;
s_next = envi_get_state (agent->envi, agent);
reward = envi_get_reward (agent->envi, agent);
+ explore = agent_decide_exploration (agent);
switch (agent->envi->parameters.algorithm)
{
case RIL_ALGO_SARSA:
- agent_modify_eligibility (agent, RIL_E_SET);
- if (agent_decide_exploration (agent))
+ if (explore)
{
a_next = agent_get_action_explore (agent, s_next);
}
{
//updates weights with selected action (on-policy), if not first step
agent_update_weights (agent, reward, s_next, a_next);
+ agent_modify_eligibility (agent, RIL_E_SET, s_next);
}
break;
//updates weights with best action, disregarding actually selected action (off-policy), if not first step
agent_update_weights (agent, reward, s_next, a_next);
}
- if (agent_decide_exploration (agent))
+ if (explore)
{
a_next = agent_get_action_explore (agent, s_next);
- agent_modify_eligibility (agent, RIL_E_ZERO);
+ agent_modify_eligibility (agent, RIL_E_ZERO, NULL);
}
else
{
a_next = agent_get_action_best (agent, s_next);
- agent_modify_eligibility (agent, RIL_E_SET);
+ agent_modify_eligibility (agent, RIL_E_SET, s_next);
}
break;
}
GNUNET_assert(RIL_ACTION_INVALID != a_next);
- agent_modify_eligibility (agent, RIL_E_ACCUMULATE);
+ agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next);
- envi_do_action (agent->envi, agent, a_next);
+ GNUNET_log (GNUNET_ERROR_TYPE_INFO, "step() Step# %llu R: %f IN %llu OUT %llu A: %d\n",
+ agent->step_count,
+ reward,
+ agent->bw_in/1024,
+ agent->bw_out/1024,
+ a_next);
- GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu: Action: %d Reward: %f Result: IN %llu OUT %llu\n",
- agent->step_count,
- a_next,
- reward,
- agent->bw_in/1024,
- agent->bw_out/1024);
+ envi_do_action (agent->envi, agent, a_next);
GNUNET_free(agent->s_old);
agent->s_old = s_next;
agent->step_count += 1;
}
-static int
+static void
ril_step (struct GAS_RIL_Handle *solver);
/**
{
struct GAS_RIL_Handle *solver = cls;
- solver->step_task_pending = GNUNET_NO;
+ solver->step_next_task_id = GNUNET_SCHEDULER_NO_TASK;
ril_step (solver);
}
for (i = 0; i < solver->networks_count; i++)
{
net = solver->network_entries[i];
- if (ril_network_get_assigned(solver, net.type, GNUNET_YES) > 0) //only consider scopes where an address is actually active
+ if (net.bw_in_assigned > 0) //only consider scopes where an address is actually active
{
- sum_assigned += ril_network_get_assigned(solver, net.type, GNUNET_YES);
- sum_assigned += ril_network_get_assigned(solver, net.type, GNUNET_NO);
+ sum_assigned += net.bw_in_assigned;
+ sum_assigned += net.bw_out_assigned;
sum_available += net.bw_in_available;
sum_available += net.bw_out_available;
}
ril_network_is_not_full (struct GAS_RIL_Handle *solver, enum GNUNET_ATS_Network_Type network)
{
struct RIL_Network *net;
- uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
struct RIL_Peer_Agent *agent;
unsigned long long address_count = 0;
}
net = ril_get_network (solver, network);
- return (net->bw_in_available > min_bw * address_count) && (net->bw_out_available > min_bw * address_count);
+ return (net->bw_in_available > MIN_BW * address_count) && (net->bw_out_available > MIN_BW * address_count);
}
static void
{
struct RIL_Address_Wrapped *addr_wrap;
struct RIL_Network *net;
- uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
for (addr_wrap = agent->addresses_head; NULL != addr_wrap; addr_wrap = addr_wrap->next)
{
if (ril_network_is_not_full(solver, net->type))
{
if (NULL == agent->address_inuse)
- envi_set_active_suggestion (solver, agent, addr_wrap->address_naked, min_bw, min_bw, silent);
+ envi_set_active_suggestion (solver, agent, addr_wrap->address_naked, MIN_BW, MIN_BW, silent);
return;
}
}
agent->address_inuse = NULL;
}
+static void
+ril_calculate_discount (struct GAS_RIL_Handle *solver)
+{
+ struct GNUNET_TIME_Absolute time_now;
+ struct GNUNET_TIME_Relative time_delta;
+ double tau;
+
+ // MDP case - remove when debugged
+ if (solver->simulate)
+ {
+ solver->global_discount_variable = solver->parameters.gamma;
+ solver->global_discount_integrated = 1;
+ return;
+ }
+
+ // semi-MDP case
+
+ //calculate tau, i.e. how many real valued time units have passed, one time unit is one minimum time step
+ time_now = GNUNET_TIME_absolute_get ();
+ time_delta = GNUNET_TIME_absolute_get_difference (solver->step_time_last, time_now);
+ solver->step_time_last = time_now;
+ tau = (double) time_delta.rel_value_us
+ / (double) solver->parameters.step_time_min.rel_value_us;
+
+ //calculate reward discounts (once per step for all agents)
+ solver->global_discount_variable = pow (M_E, ((-1.) * ((double) solver->parameters.beta) * tau));
+ solver->global_discount_integrated = (1. - solver->global_discount_variable)
+ / (double) solver->parameters.beta;
+}
+
+static void
+ril_calculate_assigned_bwnet (struct GAS_RIL_Handle *solver)
+{
+ int c;
+ struct RIL_Network *net;
+
+ for (c = 0; c < solver->networks_count; c++)
+ {
+ net = &solver->network_entries[c];
+ net->bw_in_assigned = ril_network_get_assigned(solver, net->type, GNUNET_YES);
+ net->bw_out_assigned = ril_network_get_assigned(solver, net->type, GNUNET_NO);
+ }
+}
+
/**
* Schedules the next global step in an adaptive way. The more resources are
* left, the earlier the next step is scheduled. This serves the reactivity of
double offset;
struct GNUNET_TIME_Relative time_next;
- if (solver->step_task_pending)
- {
- GNUNET_SCHEDULER_cancel (solver->step_next_task_id);
- }
-
used_ratio = ril_get_used_resource_ratio (solver);
GNUNET_assert(
GNUNET_assert(y <= (double ) solver->parameters.step_time_max.rel_value_us);
GNUNET_assert(y >= (double ) solver->parameters.step_time_min.rel_value_us);
- time_next = GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MICROSECONDS, (unsigned int) y);
+ time_next = GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MICROSECONDS, (unsigned long long) y);
+
+ if (solver->simulate)
+ {
+ time_next = GNUNET_TIME_UNIT_ZERO;
+ }
- solver->step_next_task_id = GNUNET_SCHEDULER_add_delayed (time_next, &ril_step_scheduler_task,
- solver);
- solver->step_task_pending = GNUNET_YES;
+ if ((GNUNET_SCHEDULER_NO_TASK == solver->step_next_task_id) && (GNUNET_NO == solver->done))
+ {
+ solver->step_next_task_id = GNUNET_SCHEDULER_add_delayed (time_next, &ril_step_scheduler_task,
+ solver);
+ }
}
/**
* Triggers one step per agent
* @param solver
*/
-static int
+static void
ril_step (struct GAS_RIL_Handle *solver)
{
struct RIL_Peer_Agent *cur;
- struct GNUNET_TIME_Absolute time_now;
- struct GNUNET_TIME_Relative time_delta;
- double tau;
if (GNUNET_YES == solver->bulk_lock)
{
solver->bulk_changes++;
- return GNUNET_NO;
+ return;
}
ril_inform (solver, GAS_OP_SOLVE_START, GAS_STAT_SUCCESS);
solver->step_time_last = GNUNET_TIME_absolute_get ();
}
- //calculate tau, i.e. how many real valued time units have passed, one time unit is one minimum time step
- time_now = GNUNET_TIME_absolute_get ();
- time_delta = GNUNET_TIME_absolute_get_difference (solver->step_time_last, time_now);
- tau = ((double) time_delta.rel_value_us)
- / ((double) solver->parameters.step_time_min.rel_value_us);
- solver->step_time_last = time_now;
-
- //calculate reward discounts (once per step for all agents)
- solver->global_discount_variable = pow (M_E, ((-1.) * ((double) solver->parameters.beta) * tau));
- solver->global_discount_integrated = (1 - solver->global_discount_variable)
- / ((double) solver->parameters.beta);
+ ril_calculate_discount (solver);
+ ril_calculate_assigned_bwnet (solver);
//calculate network state vector
- envi_state_networks(solver);
+// envi_state_networks(solver);
//trigger one step per active, unblocked agent
for (cur = solver->agents_head; NULL != cur; cur = cur->next)
}
}
+ ril_calculate_assigned_bwnet (solver);
+
solver->step_count += 1;
ril_step_schedule_next (solver);
}
}
ril_inform (solver, GAS_OP_SOLVE_UPDATE_NOTIFICATION_STOP, GAS_STAT_SUCCESS);
-
- return GNUNET_YES;
}
static int
count = ril_count_agents(agent->envi);
- for (other = agent->envi->agents_head; NULL != other; other = other->next)
+ for (i = 0; i < agent->n; i++)
{
- for (i = 0; i < agent->n; i++)
+ for (k = 0; k < agent->m; k++)
{
- for (k = 0; k < agent->m; k++)
- {
- if (0 == count) {
- agent->W[i][k] = 1;
- }
- else {
+ if (0 == count) {
+ agent->W[i][k] = 1;//.1 - ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX/5)/(double)UINT32_MAX);
+ }
+ else {
+ for (other = agent->envi->agents_head; NULL != other; other = other->next)
+ {
agent->W[i][k] += (other->W[i][k] / (double) count);
}
-
- GNUNET_assert(!isinf(agent->W[i][k]));
}
+
+ GNUNET_assert(!isinf(agent->W[i][k]));
}
}
}
agent->peer = *peer;
agent->step_count = 0;
agent->is_active = GNUNET_NO;
- agent->bw_in = 1024;
- agent->bw_out = 1024;
+ agent->bw_in = MIN_BW;
+ agent->bw_out = MIN_BW;
agent->suggestion_issue = GNUNET_NO;
agent->n = RIL_ACTION_TYPE_NUM;
- agent->m = (solver->networks_count * RIL_FEATURES_NETWORK_COUNT) + GNUNET_ATS_PreferenceCount;
+ agent->m = (RIL_FEATURES_NETWORK_COUNT);// + GNUNET_ATS_PreferenceCount;
agent->W = (double **) GNUNET_malloc (sizeof (double *) * agent->n);
for (i = 0; i < agent->n; i++)
{
}
agent_w_start(agent);
agent->a_old = RIL_ACTION_INVALID;
- agent->s_old = envi_get_state (solver, agent);
+ agent->s_old = GNUNET_malloc (sizeof (double) * agent->m);
agent->e = (double *) GNUNET_malloc (sizeof (double) * agent->m);
- agent_modify_eligibility (agent, RIL_E_ZERO);
+ agent_modify_eligibility (agent, RIL_E_ZERO, NULL);
return agent;
}
ril_network_is_active (struct GAS_RIL_Handle *solver, enum GNUNET_ATS_Network_Type network)
{
struct RIL_Network *net;
- uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
net = ril_get_network (solver, network);
- return net->bw_out_available >= min_bw;
+ return net->bw_out_available >= MIN_BW;
}
/**
{
solver->parameters.beta = RIL_DEFAULT_DISCOUNT_BETA;
}
+ if (GNUNET_OK == GNUNET_CONFIGURATION_get_value_string (env->cfg, "ats", "RIL_DISCOUNT_GAMMA", &string))
+ {
+ solver->parameters.gamma = strtod (string, NULL);
+ GNUNET_free (string);
+ }
+ else
+ {
+ solver->parameters.gamma = RIL_DEFAULT_DISCOUNT_GAMMA;
+ }
if (GNUNET_OK
== GNUNET_CONFIGURATION_get_value_string (env->cfg, "ats", "RIL_GRADIENT_STEP_SIZE", &string))
{
{
solver->parameters.reward_global_share = RIL_DEFAULT_GLOBAL_REWARD_SHARE;
}
+ if (GNUNET_OK != GNUNET_CONFIGURATION_get_value_number (env->cfg, "ats", "RIL_SIMULATE", &solver->simulate))
+ {
+ solver->simulate = GNUNET_NO;
+ }
env->sf.s_add = &GAS_ril_address_add;
env->sf.s_address_update_property = &GAS_ril_address_property_changed;
solver->network_entries = GNUNET_malloc (env->network_count * sizeof (struct RIL_Network));
solver->step_count = 0;
solver->global_state_networks = GNUNET_malloc (solver->networks_count * RIL_FEATURES_NETWORK_COUNT * sizeof (double));
+ solver->done = GNUNET_NO;
for (c = 0; c < env->network_count; c++)
{
cur->type = env->networks[c];
cur->bw_in_available = env->in_quota[c];
cur->bw_out_available = env->out_quota[c];
- LOG(GNUNET_ERROR_TYPE_INFO, "Quotas for %s network: IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024);
+ LOG(GNUNET_ERROR_TYPE_INFO, "init() Quotas for %s network: IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024);
}
- solver->step_next_task_id = GNUNET_SCHEDULER_add_delayed (
- GNUNET_TIME_relative_multiply (GNUNET_TIME_relative_get_millisecond_ (), 1000),
- &ril_step_scheduler_task, solver);
- solver->step_task_pending = GNUNET_YES;
-
- LOG(GNUNET_ERROR_TYPE_INFO, "Parameters:\n");
- LOG(GNUNET_ERROR_TYPE_INFO, "Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n",
+ LOG(GNUNET_ERROR_TYPE_INFO, "init() Parameters:\n");
+ LOG(GNUNET_ERROR_TYPE_INFO, "init() Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n",
solver->parameters.algorithm ? "Q" : "SARSA",
solver->parameters.alpha,
solver->parameters.beta,
solver->parameters.lambda);
- LOG(GNUNET_ERROR_TYPE_INFO, "explore = %f, global_share = %f\n",
+ LOG(GNUNET_ERROR_TYPE_INFO, "init() explore = %f, global_share = %f\n",
solver->parameters.explore_ratio,
solver->parameters.reward_global_share);
LOG(GNUNET_ERROR_TYPE_DEBUG, "API_done() Shutting down RIL solver\n");
+ s->done = GNUNET_YES;
+
cur_agent = s->agents_head;
while (NULL != cur_agent)
{
cur_agent = next_agent;
}
- if (s->step_task_pending)
+ if (GNUNET_SCHEDULER_NO_TASK != s->step_next_task_id)
{
GNUNET_SCHEDULER_cancel (s->step_next_task_id);
}
GNUNET_free(s->global_state_networks);
GNUNET_free(s);
- return NULL ;
+ return NULL;
}
/**
unsigned int n_new;
int i;
struct RIL_Network *net;
- uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
LOG(GNUNET_ERROR_TYPE_DEBUG, "API_address_delete() Delete %s%s %s address %s for peer '%s'\n",
session_only ? "session for " : "", address->active ? "active" : "inactive", address->plugin,
m_new = agent->m - RIL_FEATURES_ADDRESS_COUNT;
n_new = agent->n - 1;
+ LOG(GNUNET_ERROR_TYPE_DEBUG, "first\n");
+
for (i = 0; i < agent->n; i++)
{
ril_cut_from_vector ((void **) &agent->W[i], sizeof(double),
- ((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+ //((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+ ((RIL_FEATURES_NETWORK_COUNT) //TODO! replace, when adding more networks
+ (address_index * RIL_FEATURES_ADDRESS_COUNT)), RIL_FEATURES_ADDRESS_COUNT, agent->m);
}
GNUNET_free(agent->W[RIL_ACTION_TYPE_NUM + address_index]);
+ LOG(GNUNET_ERROR_TYPE_DEBUG, "second\n");
ril_cut_from_vector ((void **) &agent->W, sizeof(double *), RIL_ACTION_TYPE_NUM + address_index,
1, agent->n);
//correct last action
agent->a_old = RIL_ACTION_INVALID;
}
//decrease old state vector and eligibility vector
+ LOG(GNUNET_ERROR_TYPE_DEBUG, "third\n");
ril_cut_from_vector ((void **) &agent->s_old, sizeof(double),
- ((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+ //((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+ ((RIL_FEATURES_NETWORK_COUNT) //TODO! replace when adding more networks
+ (address_index * RIL_FEATURES_ADDRESS_COUNT)), RIL_FEATURES_ADDRESS_COUNT, agent->m);
ril_cut_from_vector ((void **) &agent->e, sizeof(double),
- ((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+ //((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+ ((RIL_FEATURES_NETWORK_COUNT) //TODO! replace when adding more networks
+ (address_index * RIL_FEATURES_ADDRESS_COUNT)), RIL_FEATURES_ADDRESS_COUNT, agent->m);
agent->m = m_new;
agent->n = n_new;
{
if (NULL != agent->addresses_head) //if peer has an address left, use it
{
- envi_set_active_suggestion (s, agent, agent->addresses_head->address_naked, min_bw, min_bw,
+ envi_set_active_suggestion (s, agent, agent->addresses_head->address_naked, MIN_BW, MIN_BW,
GNUNET_NO);
}
else