added simulation
[oweals/gnunet.git] / src / ats / libgnunet_plugin_ats_ril.c
index 9403975b8ea2472cb294ca1d46c6ce30ff5339d5..603e93a4d7863e7ad744ad933839eae94e8b9830 100755 (executable)
@@ -45,6 +45,8 @@
 #define RIL_DEFAULT_EXPLORE_RATIO 0.1
 #define RIL_DEFAULT_GLOBAL_REWARD_SHARE 0.5
 
+#define RIL_INC_DEC_STEP_SIZE 1
+
 /**
  * ATS reinforcement learning solver
  *
@@ -68,7 +70,7 @@ enum RIL_Action_Type
   RIL_ACTION_BW_OUT_HLV = -5,
   RIL_ACTION_BW_OUT_INC = -6,
   RIL_ACTION_BW_OUT_DEC = -7,
-  RIL_ACTION_TYPE_NUM = 2
+  RIL_ACTION_TYPE_NUM = 1
 };
 
 enum RIL_Algorithm
@@ -363,6 +365,11 @@ struct GAS_RIL_Handle
    * Shutdown
    */
   int done;
+
+  /**
+   * Simulate steps, i.e. schedule steps immediately
+   */
+  unsigned long long simulate;
 };
 
 /*
@@ -396,8 +403,7 @@ agent_estimate_q (struct RIL_Peer_Agent *agent, double *state, int action)
 
   if (isinf(result))
   {
-    GNUNET_assert(GNUNET_NO);
-    return isinf(result) * (DBL_MAX / 2); //TODO! fix
+    return isinf(result) * UINT32_MAX; //TODO! fix
   }
 
   return result;
@@ -537,13 +543,14 @@ agent_update_weights (struct RIL_Peer_Agent *agent, double reward, double *s_nex
   delta += agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime); //discounted future value
   delta -= agent_estimate_q (agent, agent->s_old, agent->a_old); //one step
 
-//  LOG(GNUNET_ERROR_TYPE_INFO, "Y*r = %f   y*Q(s+1,a+1) = %f   Q(s,a) = %f\n, y = %f\n",
-//      agent->envi->global_discount_integrated * reward,
-//      agent->envi->global_discount_variable * agent_estimate_q (agent, s_next, a_prime),
-//      agent_estimate_q (agent, agent->s_old, agent->a_old),
-//      agent->envi->global_discount_variable);
-//
-//  LOG(GNUNET_ERROR_TYPE_INFO, "delta = %f\n", delta);
+  LOG(GNUNET_ERROR_TYPE_INFO, "update()   Step# %llu  Q(s,a): %f  a: %f  r: %f  y: %f  Q(s+1,a+1) = %f  delta: %f\n",
+      agent->step_count,
+      agent_estimate_q (agent, agent->s_old, agent->a_old),
+      agent->envi->parameters.alpha,
+      reward,
+      agent->envi->global_discount_variable,
+      agent_estimate_q (agent, s_next, a_prime),
+      delta);
 
   for (i = 0; i < agent->m; i++)
   {
@@ -715,27 +722,27 @@ ril_network_get_assigned (struct GAS_RIL_Handle *solver, enum GNUNET_ATS_Network
   return sum;
 }
 
-static void
-envi_state_networks (struct GAS_RIL_Handle *solver)
-{
-  int i;
-  struct RIL_Network net;
-  int overutilized_in;
-  int overutilized_out;
-
-  for (i = 0; i < solver->networks_count; i++)
-  {
-    net = solver->network_entries[i];
-
-    overutilized_in = net.bw_in_assigned > net.bw_in_available;
-    overutilized_out = net.bw_out_assigned > net.bw_out_available;
-
-    solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 0] = ((double) net.bw_in_assigned / (double) net.bw_in_available)*10;
-    solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 1] = (double) overutilized_in;
-    solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 2] = ((double) net.bw_out_assigned / (double) net.bw_out_available)*10;
-    solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 3] = (double) overutilized_out;
-  }
-}
+//static void
+//envi_state_networks (struct GAS_RIL_Handle *solver)
+//{
+//  int i;
+//  struct RIL_Network net;
+//  int overutilized_in;
+//  int overutilized_out;
+//
+//  for (i = 0; i < solver->networks_count; i++)
+//  {
+//    net = solver->network_entries[i];
+//
+//    overutilized_in = net.bw_in_assigned > net.bw_in_available;
+//    overutilized_out = net.bw_out_assigned > net.bw_out_available;
+//
+//    solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 0] = ((double) net.bw_in_assigned / (double) net.bw_in_available)*10;
+//    solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 1] = (double) overutilized_in;
+//    solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 2] = ((double) net.bw_out_assigned / (double) net.bw_out_available)*10;
+//    solver->global_state_networks[i * RIL_FEATURES_NETWORK_COUNT + 3] = (double) overutilized_out;
+//  }
+//}
 
 /**
  * Allocates a state vector and fills it with the features present
@@ -764,17 +771,17 @@ envi_get_state (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
   state[0] = (double) net->bw_in_assigned / 1024; //(double) net->bw_in_available;
   if (net->bw_in_assigned > net->bw_in_available)
   {
-    state[1] = (double)(net->bw_in_assigned - net->bw_in_available) / 1024;// net->bw_in_available;
+    state[1] = 1;// net->bw_in_available;
   }
   else
   {
     state[1] = 0;
   }
-  LOG(GNUNET_ERROR_TYPE_INFO, "state[0] = %f\n", state[0]);
-  LOG(GNUNET_ERROR_TYPE_INFO, "state[1] = %f\n", state[1]);
+  LOG(GNUNET_ERROR_TYPE_INFO, "get_state()  state[0] = %f\n", state[0]);
+  LOG(GNUNET_ERROR_TYPE_INFO, "get_state()  state[1] = %f\n", state[1]);
 
-  LOG(GNUNET_ERROR_TYPE_INFO, "W / %08.3f %08.3f \\ \n", agent->W[0][0], agent->W[1][0]);
-  LOG(GNUNET_ERROR_TYPE_INFO, "W \\ %08.3f %08.3f / \n", agent->W[0][1], agent->W[1][1]);
+  LOG(GNUNET_ERROR_TYPE_INFO, "get_state()  W / %08.3f %08.3f \\ \n", agent->W[0][0], agent->W[1][0]);
+  LOG(GNUNET_ERROR_TYPE_INFO, "get_state()  W \\ %08.3f %08.3f / \n", agent->W[0][1], agent->W[1][1]);
 
 
   //get peer features
@@ -803,140 +810,140 @@ envi_get_state (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
   return state;
 }
 
-/**
- * For all networks a peer has an address in, this gets the maximum bandwidth which could
- * theoretically be available in one of the networks. This is used for bandwidth normalization.
- *
- * @param agent the agent handle
- * @param direction_in whether the inbound bandwidth should be considered. Returns the maximum outbound bandwidth if GNUNET_NO
- */
-static unsigned long long
-ril_get_max_bw (struct RIL_Peer_Agent *agent, int direction_in)
-{
-  /*
-   * get the maximum bandwidth possible for a peer, e.g. among all addresses which addresses'
-   * network could provide the maximum bandwidth if all that bandwidth was used on that one peer.
-   */
-  unsigned long long max = 0;
-  struct RIL_Address_Wrapped *cur;
-  struct RIL_Network *net;
-
-  for (cur = agent->addresses_head; NULL != cur; cur = cur->next)
-  {
-    net = cur->address_naked->solver_information;
-    if (direction_in)
-    {
-      if (net->bw_in_available > max)
-      {
-        max = net->bw_in_available;
-      }
-    }
-    else
-    {
-      if (net->bw_out_available > max)
-      {
-        max = net->bw_out_available;
-      }
-    }
-  }
-  return max;
-}
-
-/**
- * Get the index of the quality-property in question
- *
- * @param type the quality property type
- * @return the index
- */
-static int
-ril_find_property_index (uint32_t type)
-{
-  int existing_types[] = GNUNET_ATS_QualityProperties;
-  int c;
-  for (c = 0; c < GNUNET_ATS_QualityPropertiesCount; c++)
-    if (existing_types[c] == type)
-      return c;
-  return GNUNET_SYSERR;
-}
-
-static int
-ril_get_atsi (struct ATS_Address *address, uint32_t type)
-{
-  int c1;
-  GNUNET_assert(NULL != address);
-
-  if ((NULL == address->atsi) || (0 == address->atsi_count))
-    return 0;
-
-  for (c1 = 0; c1 < address->atsi_count; c1++)
-  {
-    if (ntohl (address->atsi[c1].type) == type)
-      return ntohl (address->atsi[c1].value);
-  }
-  return 0;
-}
-
-static double
-envi_reward_global (struct GAS_RIL_Handle *solver)
-{
-  int i;
-  struct RIL_Network net;
-  unsigned int sum_in_available = 0;
-  unsigned int sum_out_available = 0;
-  unsigned int sum_in_assigned = 0;
-  unsigned int sum_out_assigned = 0;
-  double ratio_in;
-  double ratio_out;
-
-  for (i = 0; i < solver->networks_count; i++)
-  {
-    net = solver->network_entries[i];
-    sum_in_available += net.bw_in_available;
-    sum_in_assigned += net.bw_in_assigned;
-    sum_out_available += net.bw_out_available;
-    sum_out_assigned += net.bw_out_assigned;
-  }
-
-  ratio_in = ((double) sum_in_assigned) / ((double) sum_in_available);
-  ratio_out = ((double) sum_out_assigned) / ((double) sum_out_available);
-
-  // global reward in [1,2]
-  return ratio_in +1;
-  return ((ratio_in + ratio_out) / 2) + 1;
-}
-
-static double
-envi_reward_local (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
-{
-  const double *preferences;
-  const double *properties;
-  int prop_index;
-  double pref_match = 0;
-  double bw_norm;
-  double dl_norm;
-
-  preferences = solver->plugin_envi->get_preferences (solver->plugin_envi->get_preference_cls,
-      &agent->peer);
-  properties = solver->plugin_envi->get_property (solver->plugin_envi->get_property_cls,
-      agent->address_inuse);
-
-  // delay in [0,1]
-  prop_index = ril_find_property_index (GNUNET_ATS_QUALITY_NET_DELAY);
-  dl_norm = 2 - properties[prop_index]; //invert property as we want to maximize for lower latencies
-
-  // utilization in [0,1]
-  bw_norm = (((double) ril_get_atsi (agent->address_inuse, GNUNET_ATS_UTILIZATION_IN)
-      / (double) ril_get_max_bw (agent, GNUNET_YES))
-      + ((double) ril_get_atsi (agent->address_inuse, GNUNET_ATS_UTILIZATION_OUT)
-          / (double) ril_get_max_bw (agent, GNUNET_NO))) / 2;
-
-  // preference matching in [0,4]
-  pref_match += (preferences[GNUNET_ATS_PREFERENCE_LATENCY] * dl_norm);
-  pref_match += (preferences[GNUNET_ATS_PREFERENCE_BANDWIDTH] * bw_norm);
-
-  // local reward in [1,2]
-  return (pref_match / 4) +1;
-}
+///**
+// * For all networks a peer has an address in, this gets the maximum bandwidth which could
+// * theoretically be available in one of the networks. This is used for bandwidth normalization.
+// *
+// * @param agent the agent handle
+// * @param direction_in whether the inbound bandwidth should be considered. Returns the maximum outbound bandwidth if GNUNET_NO
+// */
+//static unsigned long long
+//ril_get_max_bw (struct RIL_Peer_Agent *agent, int direction_in)
+//{
+//  /*
+//   * get the maximum bandwidth possible for a peer, e.g. among all addresses which addresses'
+//   * network could provide the maximum bandwidth if all that bandwidth was used on that one peer.
+//   */
+//  unsigned long long max = 0;
+//  struct RIL_Address_Wrapped *cur;
+//  struct RIL_Network *net;
+//
+//  for (cur = agent->addresses_head; NULL != cur; cur = cur->next)
+//  {
+//    net = cur->address_naked->solver_information;
+//    if (direction_in)
+//    {
+//      if (net->bw_in_available > max)
+//      {
+//        max = net->bw_in_available;
+//      }
+//    }
+//    else
+//    {
+//      if (net->bw_out_available > max)
+//      {
+//        max = net->bw_out_available;
+//      }
+//    }
+//  }
+//  return max;
+//}
+
+///**
+// * Get the index of the quality-property in question
+// *
+// * @param type the quality property type
+// * @return the index
+// */
+//static int
+//ril_find_property_index (uint32_t type)
+//{
+//  int existing_types[] = GNUNET_ATS_QualityProperties;
+//  int c;
+//  for (c = 0; c < GNUNET_ATS_QualityPropertiesCount; c++)
+//    if (existing_types[c] == type)
+//      return c;
+//  return GNUNET_SYSERR;
+//}
+
+//static int
+//ril_get_atsi (struct ATS_Address *address, uint32_t type)
+//{
+//  int c1;
+//  GNUNET_assert(NULL != address);
+//
+//  if ((NULL == address->atsi) || (0 == address->atsi_count))
+//    return 0;
+//
+//  for (c1 = 0; c1 < address->atsi_count; c1++)
+//  {
+//    if (ntohl (address->atsi[c1].type) == type)
+//      return ntohl (address->atsi[c1].value);
+//  }
+//  return 0;
+//}
+
+//static double
+//envi_reward_global (struct GAS_RIL_Handle *solver)
+//{
+//  int i;
+//  struct RIL_Network net;
+//  unsigned int sum_in_available = 0;
+//  unsigned int sum_out_available = 0;
+//  unsigned int sum_in_assigned = 0;
+//  unsigned int sum_out_assigned = 0;
+//  double ratio_in;
+//  double ratio_out;
+//
+//  for (i = 0; i < solver->networks_count; i++)
+//  {
+//    net = solver->network_entries[i];
+//    sum_in_available += net.bw_in_available;
+//    sum_in_assigned += net.bw_in_assigned;
+//    sum_out_available += net.bw_out_available;
+//    sum_out_assigned += net.bw_out_assigned;
+//  }
+//
+//  ratio_in = ((double) sum_in_assigned) / ((double) sum_in_available);
+//  ratio_out = ((double) sum_out_assigned) / ((double) sum_out_available);
+//
+//  // global reward in [1,2]
+//  return ratio_in +1;
+//  return ((ratio_in + ratio_out) / 2) + 1;
+//}
+
+//static double
+//envi_reward_local (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
+//{
+//  const double *preferences;
+//  const double *properties;
+//  int prop_index;
+//  double pref_match = 0;
+//  double bw_norm;
+//  double dl_norm;
+//
+//  preferences = solver->plugin_envi->get_preferences (solver->plugin_envi->get_preference_cls,
+//      &agent->peer);
+//  properties = solver->plugin_envi->get_property (solver->plugin_envi->get_property_cls,
+//      agent->address_inuse);
+//
+//  // delay in [0,1]
+//  prop_index = ril_find_property_index (GNUNET_ATS_QUALITY_NET_DELAY);
+//  dl_norm = 2 - properties[prop_index]; //invert property as we want to maximize for lower latencies
+//
+//  // utilization in [0,1]
+//  bw_norm = (((double) ril_get_atsi (agent->address_inuse, GNUNET_ATS_UTILIZATION_IN)
+//      / (double) ril_get_max_bw (agent, GNUNET_YES))
+//      + ((double) ril_get_atsi (agent->address_inuse, GNUNET_ATS_UTILIZATION_OUT)
+//          / (double) ril_get_max_bw (agent, GNUNET_NO))) / 2;
+//
+//  // preference matching in [0,4]
+//  pref_match += (preferences[GNUNET_ATS_PREFERENCE_LATENCY] * dl_norm);
+//  pref_match += (preferences[GNUNET_ATS_PREFERENCE_BANDWIDTH] * bw_norm);
+//
+//  // local reward in [1,2]
+//  return (pref_match / 4) +1;
+//}
 
 /**
  * Gets the reward for the last performed step, which is calculated in equal
@@ -1072,7 +1079,7 @@ envi_action_bw_inc (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent,
 
   if (direction_in)
   {
-    new_bw = agent->bw_in + (1 * MIN_BW);
+    new_bw = agent->bw_in + (RIL_INC_DEC_STEP_SIZE * MIN_BW);
     if (new_bw < agent->bw_in || new_bw > GNUNET_ATS_MaxBandwidth)
       new_bw = GNUNET_ATS_MaxBandwidth;
     envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw,
@@ -1080,7 +1087,7 @@ envi_action_bw_inc (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent,
   }
   else
   {
-    new_bw = agent->bw_out + (1 * MIN_BW);
+    new_bw = agent->bw_out + (RIL_INC_DEC_STEP_SIZE * MIN_BW);
     if (new_bw < agent->bw_out || new_bw > GNUNET_ATS_MaxBandwidth)
       new_bw = GNUNET_ATS_MaxBandwidth;
     envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in,
@@ -1104,7 +1111,7 @@ envi_action_bw_dec (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent,
 
   if (direction_in)
   {
-    new_bw = agent->bw_in - (1 * MIN_BW);
+    new_bw = agent->bw_in - (RIL_INC_DEC_STEP_SIZE * MIN_BW);
     if (new_bw < MIN_BW || new_bw > agent->bw_in)
       new_bw = MIN_BW;
     envi_set_active_suggestion (solver, agent, agent->address_inuse, new_bw, agent->bw_out,
@@ -1112,7 +1119,7 @@ envi_action_bw_dec (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent,
   }
   else
   {
-    new_bw = agent->bw_out - (1 * MIN_BW);
+    new_bw = agent->bw_out - (RIL_INC_DEC_STEP_SIZE * MIN_BW);
     if (new_bw < MIN_BW || new_bw > agent->bw_out)
       new_bw = MIN_BW;
     envi_set_active_suggestion (solver, agent, agent->address_inuse, agent->bw_in, new_bw,
@@ -1220,6 +1227,7 @@ static void
 agent_step (struct RIL_Peer_Agent *agent)
 {
   int a_next = RIL_ACTION_INVALID;
+  int explore;
   double *s_next;
   double reward;
 
@@ -1229,22 +1237,12 @@ agent_step (struct RIL_Peer_Agent *agent)
 
   s_next = envi_get_state (agent->envi, agent);
   reward = envi_get_reward (agent->envi, agent);
-
-  GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu:   A: %d   R: %f  IN %llu   OUT %llu\n",
-      agent->step_count,
-      agent->a_old,
-      reward,
-      agent->bw_in/1024,
-      agent->bw_out/1024);
-  GNUNET_log (GNUNET_ERROR_TYPE_INFO, "Agent step %llu:   Best A: %d   Q(s,a): %f \n",
-        agent->step_count,
-        agent_get_action_best (agent, s_next),
-        agent_estimate_q(agent, s_next, agent_get_action_best (agent, s_next)));
+  explore = agent_decide_exploration (agent);
 
   switch (agent->envi->parameters.algorithm)
   {
   case RIL_ALGO_SARSA:
-    if (agent_decide_exploration (agent))
+    if (explore)
     {
       a_next = agent_get_action_explore (agent, s_next);
     }
@@ -1267,7 +1265,7 @@ agent_step (struct RIL_Peer_Agent *agent)
       //updates weights with best action, disregarding actually selected action (off-policy), if not first step
       agent_update_weights (agent, reward, s_next, a_next);
     }
-    if (agent_decide_exploration (agent))
+    if (explore)
     {
       a_next = agent_get_action_explore (agent, s_next);
       agent_modify_eligibility (agent, RIL_E_ZERO, NULL);
@@ -1284,6 +1282,13 @@ agent_step (struct RIL_Peer_Agent *agent)
 
   agent_modify_eligibility (agent, RIL_E_ACCUMULATE, s_next);
 
+  GNUNET_log (GNUNET_ERROR_TYPE_INFO, "step()  Step# %llu  R: %f  IN %llu  OUT %llu  A: %d\n",
+        agent->step_count,
+        reward,
+        agent->bw_in/1024,
+        agent->bw_out/1024,
+        a_next);
+
   envi_do_action (agent->envi, agent, a_next);
 
   GNUNET_free(agent->s_old);
@@ -1416,7 +1421,7 @@ ril_calculate_discount (struct GAS_RIL_Handle *solver)
   double tau;
 
   // MDP case - remove when debugged
-  if (solver->parameters.step_time_min.rel_value_us == solver->parameters.step_time_max.rel_value_us)
+  if (solver->simulate)
   {
     solver->global_discount_variable = solver->parameters.gamma;
     solver->global_discount_integrated = 1;
@@ -1484,6 +1489,11 @@ ril_step_schedule_next (struct GAS_RIL_Handle *solver)
 
   time_next = GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MICROSECONDS, (unsigned long long) y);
 
+  if (solver->simulate)
+  {
+    time_next = GNUNET_TIME_UNIT_ZERO;
+  }
+
   if ((GNUNET_SCHEDULER_NO_TASK == solver->step_next_task_id) && (GNUNET_NO == solver->done))
   {
     solver->step_next_task_id = GNUNET_SCHEDULER_add_delayed (time_next, &ril_step_scheduler_task,
@@ -1583,7 +1593,7 @@ agent_w_start (struct RIL_Peer_Agent *agent)
     for (k = 0; k < agent->m; k++)
     {
       if (0 == count) {
-        agent->W[i][k] = 1.1 - ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX/5)/(double)UINT32_MAX);
+        agent->W[i][k] = 1;//.1 - ((double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX/5)/(double)UINT32_MAX);
       }
       else {
         for (other = agent->envi->agents_head; NULL != other; other = other->next)
@@ -1878,6 +1888,10 @@ libgnunet_plugin_ats_ril_init (void *cls)
   {
     solver->parameters.reward_global_share = RIL_DEFAULT_GLOBAL_REWARD_SHARE;
   }
+  if (GNUNET_OK != GNUNET_CONFIGURATION_get_value_number (env->cfg, "ats", "RIL_SIMULATE", &solver->simulate))
+  {
+    solver->simulate = GNUNET_NO;
+  }
 
   env->sf.s_add = &GAS_ril_address_add;
   env->sf.s_address_update_property = &GAS_ril_address_property_changed;
@@ -1905,16 +1919,16 @@ libgnunet_plugin_ats_ril_init (void *cls)
     cur->type = env->networks[c];
     cur->bw_in_available = env->in_quota[c];
     cur->bw_out_available = env->out_quota[c];
-    LOG(GNUNET_ERROR_TYPE_INFO, "Quotas for %s network:  IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024);
+    LOG(GNUNET_ERROR_TYPE_INFO, "init()  Quotas for %s network:  IN %llu - OUT %llu\n", GNUNET_ATS_print_network_type(cur->type), cur->bw_in_available/1024, cur->bw_out_available/1024);
   }
 
-  LOG(GNUNET_ERROR_TYPE_INFO, "Parameters:\n");
-  LOG(GNUNET_ERROR_TYPE_INFO, "Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n",
+  LOG(GNUNET_ERROR_TYPE_INFO, "init()  Parameters:\n");
+  LOG(GNUNET_ERROR_TYPE_INFO, "init()  Algorithm = %s, alpha = %f, beta = %f, lambda = %f\n",
       solver->parameters.algorithm ? "Q" : "SARSA",
       solver->parameters.alpha,
       solver->parameters.beta,
       solver->parameters.lambda);
-  LOG(GNUNET_ERROR_TYPE_INFO, "explore = %f, global_share = %f\n",
+  LOG(GNUNET_ERROR_TYPE_INFO, "init()  explore = %f, global_share = %f\n",
       solver->parameters.explore_ratio,
       solver->parameters.reward_global_share);
 
@@ -2092,13 +2106,17 @@ GAS_ril_address_delete (void *solver, struct ATS_Address *address, int session_o
   m_new = agent->m - RIL_FEATURES_ADDRESS_COUNT;
   n_new = agent->n - 1;
 
+  LOG(GNUNET_ERROR_TYPE_DEBUG, "first\n");
+
   for (i = 0; i < agent->n; i++)
   {
     ril_cut_from_vector ((void **) &agent->W[i], sizeof(double),
-        ((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+        //((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+        ((RIL_FEATURES_NETWORK_COUNT) //TODO! replace, when adding more networks
             + (address_index * RIL_FEATURES_ADDRESS_COUNT)), RIL_FEATURES_ADDRESS_COUNT, agent->m);
   }
   GNUNET_free(agent->W[RIL_ACTION_TYPE_NUM + address_index]);
+  LOG(GNUNET_ERROR_TYPE_DEBUG, "second\n");
   ril_cut_from_vector ((void **) &agent->W, sizeof(double *), RIL_ACTION_TYPE_NUM + address_index,
       1, agent->n);
   //correct last action
@@ -2111,11 +2129,14 @@ GAS_ril_address_delete (void *solver, struct ATS_Address *address, int session_o
     agent->a_old = RIL_ACTION_INVALID;
   }
   //decrease old state vector and eligibility vector
+  LOG(GNUNET_ERROR_TYPE_DEBUG, "third\n");
   ril_cut_from_vector ((void **) &agent->s_old, sizeof(double),
-      ((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+      //((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+      ((RIL_FEATURES_NETWORK_COUNT) //TODO! replace when adding more networks
           + (address_index * RIL_FEATURES_ADDRESS_COUNT)), RIL_FEATURES_ADDRESS_COUNT, agent->m);
   ril_cut_from_vector ((void **) &agent->e, sizeof(double),
-      ((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+      //((s->networks_count * RIL_FEATURES_NETWORK_COUNT)
+      ((RIL_FEATURES_NETWORK_COUNT) //TODO! replace when adding more networks
           + (address_index * RIL_FEATURES_ADDRESS_COUNT)), RIL_FEATURES_ADDRESS_COUNT, agent->m);
   agent->m = m_new;
   agent->n = n_new;