src/ats/gnunet-service-ats-solver_ril.c

   1 /*
   2  This file is part of GNUnet.
   3  (C) 2011 Christian Grothoff (and other contributing authors)
   4
   5  GNUnet is free software; you can redistribute it and/or modify
   6  it under the terms of the GNU General Public License as published
   7  by the Free Software Foundation; either version 3, or (at your
   8  option) any later version.
   9
  10  GNUnet is distributed in the hope that it will be useful, but
  11  WITHOUT ANY WARRANTY; without even the implied warranty of
  12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  General Public License for more details.
  14
  15  You should have received a copy of the GNU General Public License
  16  along with GNUnet; see the file COPYING.  If not, write to the
  17  Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18  Boston, MA 02111-1307, USA.
  19  */
  20
  21 /**
  22  * @file ats/gnunet-service-ats-solver_ril.c
  23  * @brief ATS reinforcement learning solver
  24  * @author Fabian Oehlmann
  25  * @author Matthias Wachs
  26  */
  27 #include "platform.h"
  28 #include "float.h"
  29 #include "gnunet_util_lib.h"
  30 #include "gnunet-service-ats_addresses.h"
  31 #include "gnunet_statistics_service.h"
  32
  33 #define RIL_DEFAULT_STEP_TIME GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 3000)
  34 #define RIL_DEFAULT_ALGORITHM RIL_ALGO_Q
  35 #define RIL_DEFAULT_DISCOUNT_FACTOR 0.5
  36 #define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.4
  37 #define RIL_DEFAULT_TRACE_DECAY 0.6
  38 #define RIL_EXPLORE_RATIO 0.1
  39
  40 /**
  41  * ATS reinforcement learning solver
  42  *
  43  * General description
  44  */
  45
  46 enum RIL_Action_Type
  47 {
  48   RIL_ACTION_BW_IN_DBL = 0,
  49   RIL_ACTION_BW_OUT_DBL = 1,
  50   RIL_ACTION_BW_IN_HLV = 2,
  51   RIL_ACTION_BW_OUT_HLV = 3,
  52   RIL_ACTION_TYPE_NUM = 4
  53 };
  54 //TODO! add the rest of the actions
  55
  56 enum RIL_Algorithm
  57 {
  58   RIL_ALGO_SARSA = 0,
  59   RIL_ALGO_Q = 1
  60 };
  61
  62 enum RIL_E_Modification
  63 {
  64   RIL_E_SET, RIL_E_ZERO, RIL_E_ACCUMULATE, RIL_E_REPLACE
  65 };
  66
  67 /**
  68  * Global learning parameters
  69  */
  70 struct RIL_Learning_Parameters
  71 {
  72   /**
  73    * The TD-algorithm to use
  74    */
  75   enum RIL_Algorithm algorithm;
  76
  77   /**
  78    * Learning discount factor in the TD-update
  79    */
  80   float gamma;
  81
  82   /**
  83    * Gradient-descent step-size
  84    */
  85   float alpha;
  86
  87   /**
  88    * Trace-decay factor for eligibility traces
  89    */
  90   float lambda;
  91 };
  92
  93 struct RIL_Peer_Agent
  94 {
  95   /**
  96    * Next agent in solver's linked list
  97    */
  98   struct RIL_Peer_Agent *next;
  99
 100   /**
 101    * Previous agent in solver's linked list
 102    */
 103   struct RIL_Peer_Agent *prev;
 104
 105   /**
 106    * Environment handle
 107    */
 108   struct GAS_RIL_Handle *envi;
 109
 110   /**
 111    * Peer ID
 112    */
 113   struct GNUNET_PeerIdentity peer;
 114
 115   /**
 116    * Whether the agent is active or not
 117    */
 118   int active;
 119
 120   /**
 121    * Number of performed time-steps
 122    */
 123   unsigned long long step_count;
 124
 125   /**
 126    * Experience matrix W
 127    */
 128   double ** W;
 129
 130   /**
 131    * Number of rows of W / Number of state-vector features
 132    */
 133   int m;
 134
 135   /**
 136    * Number of columns of W / Number of actions
 137    */
 138   int n;
 139
 140   /**
 141    * Last perceived state feature vector
 142    */
 143   double * s_old;
 144
 145   /**
 146    * Last chosen action
 147    */
 148   int a_old;
 149
 150   /**
 151    * Eligibility trace vector
 152    */
 153   double * e;
 154
 155   /**
 156    * Address in use
 157    */
 158   struct ATS_Address * address;
 159
 160   /**
 161    * Inbound bandwidth assigned by the agent
 162    */
 163   unsigned long long bw_in;
 164
 165   /**
 166    * Outbound bandwidth assigned by the agent
 167    */
 168   unsigned long long bw_out;
 169 };
 170
 171 struct RIL_Network
 172 {
 173   /**
 174    * ATS network type
 175    */
 176   enum GNUNET_ATS_Network_Type type;
 177
 178   /**
 179    * Total available inbound bandwidth
 180    */
 181   unsigned long long bw_in_available;
 182
 183   /**
 184    * Total assigned outbound bandwidth
 185    */
 186   unsigned long long bw_in_assigned;
 187
 188   /**
 189    * Total available outbound bandwidth
 190    */
 191   unsigned long long bw_out_available;
 192
 193   /**
 194    * Total assigned outbound bandwidth
 195    */
 196   unsigned long long bw_out_assigned;
 197 };
 198
 199 struct RIL_Callbacks
 200 {
 201   /**
 202    * Bandwidth changed callback
 203    */
 204   GAS_bandwidth_changed_cb bw_changed;
 205
 206   /**
 207    * Bandwidth changed callback cls
 208    */
 209   void *bw_changed_cls;
 210
 211   /**
 212    * ATS function to get preferences
 213    */
 214   GAS_get_preferences get_preferences;
 215
 216   /**
 217    * Closure for ATS function to get preferences
 218    */
 219   void *get_preferences_cls;
 220
 221   /**
 222    * ATS function to get properties
 223    */
 224   GAS_get_properties get_properties;
 225
 226   /**
 227    * Closure for ATS function to get properties
 228    */
 229   void *get_properties_cls;
 230 };
 231
 232 /**
 233  * A handle for the reinforcement learning solver
 234  */
 235 struct GAS_RIL_Handle
 236 {
 237   /**
 238    * Statistics handle
 239    */
 240   struct GNUNET_STATISTICS_Handle *stats;
 241
 242   /**
 243    * Hashmap containing all valid addresses
 244    */
 245   const struct GNUNET_CONTAINER_MultiHashMap *addresses;
 246
 247   /**
 248    * Callbacks for the solver
 249    */
 250   struct RIL_Callbacks *callbacks;
 251
 252   /**
 253    * Bulk lock
 254    */
 255   int bulk_lock;
 256
 257   /**
 258    * Number of changes while solver was locked
 259    */
 260   int bulk_requests;
 261
 262   /**
 263    * Number of performed time-steps
 264    */
 265   unsigned long long step_count;
 266
 267   /**
 268    * Interval time between steps in milliseconds //TODO? put in agent
 269    */
 270   struct GNUNET_TIME_Relative step_time;
 271
 272   /**
 273    * Task identifier of the next time-step to be executed //TODO? put in agent
 274    */
 275   GNUNET_SCHEDULER_TaskIdentifier next_step;
 276
 277   /**
 278    * Learning parameters
 279    */
 280   struct RIL_Learning_Parameters parameters;
 281
 282   /**
 283    * Array of networks with global assignment state
 284    */
 285   struct RIL_Network * network_entries;
 286
 287   /**
 288    * Networks count
 289    */
 290   unsigned int networks_count;
 291
 292   /**
 293    * List of active peer-agents
 294    */
 295   struct RIL_Peer_Agent * agents_head;
 296   struct RIL_Peer_Agent * agents_tail;
 297 };
 298
 299 /**
 300  *  Private functions
 301  *  ---------------------------
 302  */
 303
 304 /**
 305  * Estimate the current action-value for state s and action a
 306  * @param agent agent performing the estimation
 307  * @param state s
 308  * @param action a
 309  * @return estimation value
 310  */
 311 static double
 312 agent_estimate_q (struct RIL_Peer_Agent *agent, double *state, int action)
 313 {
 314   int i;
 315   double result = 0;
 316
 317   for (i = 0; i < agent->m; i++)
 318   {
 319     result += state[i] * agent->W[action][i];
 320   }
 321
 322   return result;
 323 }
 324
 325 /**
 326  * Decide whether to do exploration (i.e. taking a new action) or exploitation (i.e. taking the
 327  * currently estimated best action) in the current step
 328  * @param agent agent performing the step
 329  * @return yes, if exploring
 330  */
 331 static int
 332 agent_decide_exploration (struct RIL_Peer_Agent *agent)
 333 {
 334   double r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,
 335       UINT32_MAX) / (double) UINT32_MAX;
 336
 337   if (r < RIL_EXPLORE_RATIO)
 338   {
 339     return GNUNET_YES;
 340   }
 341   return GNUNET_NO;
 342 }
 343
 344 /**
 345  * Gets the action, with the maximal estimated Q-value (i.e. the one currently estimated to bring the
 346  * most reward in the future)
 347  * @param agent agent performing the calculation
 348  * @param state the state from which to take the action
 349  * @return the action promising most future reward
 350  */
 351 static int
 352 agent_get_action_best (struct RIL_Peer_Agent *agent, double *state)
 353 {
 354   int i;
 355   int max_i = -1;
 356   double cur_q;
 357   double max_q = -DBL_MAX;
 358
 359   for (i = 0; i < agent->n; i++)
 360   {
 361     cur_q = agent_estimate_q (agent, state, i);
 362     if (cur_q > max_q)
 363     {
 364       max_q = cur_q;
 365       max_i = i;
 366     }
 367   }
 368
 369   GNUNET_assert(-1 != max_i);
 370
 371   return max_i;
 372 }
 373
 374 /**
 375  * Gets any action, to explore the action space from that state
 376  * @param agent agent performing the calculation
 377  * @param state the state from which to take the action
 378  * @return any action
 379  */
 380 static int
 381 agent_get_action_explore (struct RIL_Peer_Agent *agent, double *state)
 382 {
 383   return GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, agent->n);
 384 }
 385
 386 /**
 387  * Updates the weights (i.e. coefficients) of the weight vector in matrix W for action a
 388  * @param agent the agent performing the update
 389  * @param reward the reward received for the last action
 390  * @param s_next the new state, the last step got the agent into
 391  * @param a_prime the new
 392  */
 393 static void
 394 agent_update_weights (struct RIL_Peer_Agent *agent,
 395     double reward,
 396     double *s_next,
 397     int a_prime)
 398 {
 399   int i;
 400   double delta;
 401   double *theta = agent->W[agent->a_old];
 402
 403   GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "agent_update_weights() MUH a_old = %d\n", agent->a_old);
 404   delta = reward + agent_estimate_q (agent, s_next, a_prime)
 405       - agent_estimate_q (agent, agent->s_old, agent->a_old);
 406   for (i = 0; i < agent->m; i++)
 407   {
 408     theta[i] += agent->envi->parameters.alpha * delta * (agent->e)[i];
 409   }
 410 }
 411
 412 /**
 413  * Changes the eligibility trace vector e in various manners:
 414  * RIL_E_ACCUMULATE - adds 1 to each component as in accumulating eligibility traces
 415  * RIL_E_REPLACE - resets each component to 1 as in replacing traces
 416  * RIL_E_SET - multiplies e with gamma and lambda as in the update rule
 417  * RIL_E_ZERO - sets e to 0 as in Watkin's Q-learning algorithm when exploring and when initializing
 418  * @param agent
 419  * @param mod
 420  */
 421 static void
 422 agent_modify_eligibility (struct RIL_Peer_Agent *agent,
 423     enum RIL_E_Modification mod)
 424 {
 425   int i;
 426   double *e = agent->e;
 427   double gamma = agent->envi->parameters.gamma;
 428   double lambda = agent->envi->parameters.lambda;
 429
 430   for (i = 0; i < agent->m; i++)
 431   {
 432     switch (mod)
 433     {
 434     case RIL_E_ACCUMULATE:
 435       e[i] += 1;
 436       break;
 437     case RIL_E_REPLACE:
 438       e[i] = 1;
 439       break;
 440     case RIL_E_SET:
 441       e[i] = gamma * lambda;
 442       break;
 443     case RIL_E_ZERO:
 444       e[i] = 0;
 445       break;
 446     }
 447   }
 448 }
 449
 450 /**
 451  * Allocates a state vector and fills it with the features present
 452  * @param solver the solver handle
 453  * @return pointer to the state vector
 454  */
 455 static double *
 456 envi_get_state (struct GAS_RIL_Handle *solver)
 457 {
 458   int i;
 459   struct RIL_Network *net;
 460   double *state = GNUNET_malloc (sizeof (double) * solver->networks_count * 4);
 461
 462   for (i = 0; i < solver->networks_count; i++)
 463   {
 464     net = &solver->network_entries[i];
 465     state[i*4 + 0] = (double) net->bw_in_assigned;
 466     state[i*4 + 1] = (double) net->bw_in_available;
 467     state[i*4 + 2] = (double) net->bw_out_assigned;
 468     state[i*4 + 3] = (double) net->bw_out_available;
 469   }
 470
 471   return state;
 472 }
 473
 474 /**
 475  * Gets the reward of the last performed step
 476  * @param solver solver handle
 477  * @return the reward
 478  */
 479 static double
 480 envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
 481 {
 482   //TODO! implement reward calculation
 483
 484   return (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,
 485       UINT32_MAX) / (double) UINT32_MAX;
 486 }
 487
 488 static void
 489 envi_action_bw_double (struct GAS_RIL_Handle *solver,
 490     struct RIL_Peer_Agent *agent,
 491     int direction_in)
 492 {
 493   if (direction_in)
 494   {
 495     agent->bw_in *= 2;
 496     agent->address->assigned_bw_in.value__ = htonl (agent->bw_in);
 497     solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls,
 498         agent->address);
 499   }
 500   else
 501   {
 502     agent->bw_out *= 2;
 503     agent->address->assigned_bw_out.value__ = htonl (agent->bw_out);
 504     solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls,
 505         agent->address);
 506   }
 507 }
 508
 509 static void
 510 envi_action_bw_halven (struct GAS_RIL_Handle *solver,
 511     struct RIL_Peer_Agent *agent,
 512     int direction_in)
 513 {
 514   if ((direction_in && 1 == agent->bw_in)
 515       || (!direction_in && 1 == agent->bw_out))
 516   {
 517     return;
 518   }
 519   if (direction_in)
 520   {
 521     agent->bw_in /= 2;
 522     agent->address->assigned_bw_in.value__ = htonl (agent->bw_in);
 523     solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls,
 524         agent->address);
 525   }
 526   else
 527   {
 528     agent->bw_out /= 2;
 529     agent->address->assigned_bw_out.value__ = htonl (agent->bw_out);
 530     solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls,
 531         agent->address);
 532   }
 533 }
 534
 535 /**
 536  * Puts the action into effect
 537  * @param solver solver handle
 538  * @param action action to perform by the solver
 539  */
 540 static void
 541 envi_do_action (struct GAS_RIL_Handle *solver,
 542     struct RIL_Peer_Agent *agent,
 543     int action)
 544 {
 545   switch (action)
 546   {
 547   case RIL_ACTION_BW_IN_DBL:
 548     envi_action_bw_double (solver, agent, GNUNET_YES);
 549     break;
 550   case RIL_ACTION_BW_IN_HLV:
 551     envi_action_bw_halven (solver, agent, GNUNET_YES);
 552     break;
 553   case RIL_ACTION_BW_OUT_DBL:
 554     envi_action_bw_double (solver, agent, GNUNET_NO);
 555     break;
 556   case RIL_ACTION_BW_OUT_HLV:
 557     envi_action_bw_halven (solver, agent, GNUNET_NO);
 558     break;
 559   }
 560 }
 561
 562 /**
 563  * Performs one step of the Markov Decision Process. Other than in the literature the step starts
 564  * after having done the last action a_old. It observes the new state s_next and the reward
 565  * received. Then the coefficient update is done according to the SARSA or Q-learning method. The
 566  * next action is put into effect.
 567  * @param agent the agent performing the step
 568  */
 569 static void
 570 agent_step (struct RIL_Peer_Agent *agent)
 571 {
 572   int a_next = -1;
 573   double *s_next;
 574   double reward;
 575
 576   s_next = envi_get_state (agent->envi);
 577   reward = envi_get_reward (agent->envi, agent);
 578
 579   GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "agent_step() with algorithm %s\n",
 580       agent->envi->parameters.algorithm ? "Q" : "SARSA");
 581
 582   switch (agent->envi->parameters.algorithm)
 583   {
 584   case RIL_ALGO_SARSA:
 585     agent_modify_eligibility (agent, RIL_E_SET);
 586     if (agent_decide_exploration (agent))
 587     {
 588       a_next = agent_get_action_explore (agent, s_next);
 589     }
 590     else
 591     {
 592       a_next = agent_get_action_best (agent, s_next);
 593     }
 594     //updates weights with selected action (on-policy), if not first step
 595     if (-1 != agent->a_old)
 596       agent_update_weights (agent, reward, s_next, a_next);
 597     break;
 598
 599   case RIL_ALGO_Q:
 600     //updates weights with best action, disregarding actually selected action (off-policy), if not first step
 601     a_next = agent_get_action_best (agent, s_next);
 602     if (-1 != agent->a_old)
 603       agent_update_weights (agent, reward, s_next, a_next);
 604     if (agent_decide_exploration (agent))
 605     {
 606       a_next = agent_get_action_explore (agent, s_next);
 607       agent_modify_eligibility (agent, RIL_E_ZERO);
 608     }
 609     else
 610     {
 611       a_next = agent_get_action_best (agent, s_next);
 612       agent_modify_eligibility (agent, RIL_E_SET);
 613     }
 614     break;
 615   }
 616
 617   GNUNET_assert(-1 != a_next);
 618
 619   agent_modify_eligibility (agent, RIL_E_ACCUMULATE);
 620
 621   envi_do_action (agent->envi, agent, a_next);
 622
 623   GNUNET_free(agent->s_old);
 624   agent->s_old = s_next;
 625   agent->a_old = a_next;
 626
 627   agent->step_count += 1;
 628 }
 629
 630 /**
 631  * Cycles through all agents and lets the active ones do a step. Schedules the next step.
 632  * @param solver the solver handle
 633  * @param tc task context for the scheduler
 634  */
 635 static void
 636 ril_periodic_step (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
 637 {
 638   struct GAS_RIL_Handle *solver = cls;
 639   struct RIL_Peer_Agent *cur;
 640
 641   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "RIL step number %d\n",
 642       solver->step_count);
 643
 644   for (cur = solver->agents_head; NULL != cur; cur = cur->next)
 645   {
 646     if (cur->active)
 647     {
 648       agent_step (cur);
 649     }
 650   }
 651
 652   solver->step_count += 1;
 653   solver->next_step = GNUNET_SCHEDULER_add_delayed (solver->step_time,
 654       &ril_periodic_step, solver);
 655 }
 656
 657 /**
 658  * Initialize an agent without addresses and its knowledge base
 659  * @param s ril solver
 660  * @param peer the one in question
 661  * @return handle to the new agent
 662  */
 663 static struct RIL_Peer_Agent *
 664 agent_init (void *s, const struct GNUNET_PeerIdentity *peer)
 665 {
 666   int i;
 667   struct GAS_RIL_Handle * solver = s;
 668   struct RIL_Peer_Agent * agent = GNUNET_malloc (sizeof (struct RIL_Peer_Agent));
 669
 670   agent->envi = solver;
 671   agent->peer = *peer;
 672   agent->step_count = 0;
 673   agent->active = GNUNET_NO;
 674   agent->s_old = envi_get_state (solver);
 675   agent->n = RIL_ACTION_TYPE_NUM;
 676   agent->m = solver->networks_count * 4;
 677   agent->W = (double **) GNUNET_malloc (sizeof (double) * agent->n);
 678   for (i = 0; i < agent->n; i++)
 679   {
 680     agent->W[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m);
 681   }
 682   agent->a_old = -1;
 683   agent->e = (double *) GNUNET_malloc (sizeof (double) * agent->m);
 684   agent_modify_eligibility (agent, RIL_E_ZERO);
 685
 686   GNUNET_CONTAINER_DLL_insert_tail(solver->agents_head, solver->agents_tail,
 687       agent);
 688
 689   return agent;
 690 }
 691
 692 /**
 693  * Deallocate agent
 694  * @param s solver handle
 695  * @param agent the agent to retire
 696  */
 697 static void
 698 agent_die (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
 699 {
 700   int i;
 701
 702   for (i = 0; i < agent->n; i++)
 703   {
 704     GNUNET_free(agent->W[i]);
 705   }
 706   GNUNET_free(agent->W);
 707   GNUNET_free(agent->e);
 708   GNUNET_free(agent->s_old);
 709 }
 710
 711 static void
 712 ril_remove_agent (struct GAS_RIL_Handle *s, struct RIL_Peer_Agent *agent)
 713 {
 714   struct RIL_Peer_Agent *cur_agent;
 715   struct RIL_Peer_Agent *next_agent;
 716
 717   cur_agent = s->agents_head;
 718   while (NULL != cur_agent)
 719   {
 720     next_agent = cur_agent->next;
 721
 722     if (agent == cur_agent)
 723       GNUNET_CONTAINER_DLL_remove(s->agents_head, s->agents_tail, cur_agent);
 724       agent_die (s, cur_agent);
 725
 726     cur_agent = next_agent;
 727   }
 728 }
 729
 730 /**
 731  * Counts the (active) agents
 732  * @param solver solver handle
 733  * @param active_only whether only active agents should be counted
 734  * @return number of agents
 735  */
 736 static int
 737 ril_count_agents (struct GAS_RIL_Handle *solver, int active_only)
 738 {
 739   int c;
 740   struct RIL_Peer_Agent *cur;
 741
 742   c = 0;
 743   for (cur = solver->agents_head; NULL != cur; cur = cur->next)
 744   {
 745     if ((!active_only) || (active_only && cur->active))
 746     {
 747       c += 1;
 748     }
 749   }
 750   return c;
 751 }
 752
 753 /**
 754  * Returns the agent for a peer
 755  * @param s solver handle
 756  * @param peer identity of the peer
 757  * @param create whether to create an agent if none is allocated yet
 758  * @return agent
 759  */
 760 static struct RIL_Peer_Agent *
 761 ril_get_agent (struct GAS_RIL_Handle *solver,
 762     const struct GNUNET_PeerIdentity *peer,
 763     int create)
 764 {
 765   struct RIL_Peer_Agent *cur;
 766
 767   for (cur = solver->agents_head; NULL != cur; cur = cur->next)
 768   {
 769     if (0 == GNUNET_CRYPTO_hash_cmp (&peer->hashPubKey, &cur->peer.hashPubKey))
 770     {
 771       return cur;
 772     }
 773   }
 774
 775   if (create)
 776     return agent_init (solver, peer);
 777   return NULL;
 778 }
 779
 780 static int
 781 ril_network_is_active (struct RIL_Network *network)
 782 {
 783   uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
 784
 785   if (network->bw_out_available < min_bw)
 786     return GNUNET_NO;
 787   return GNUNET_YES;
 788 }
 789
 790 /**
 791  * Iterator, which allocates one agent per peer
 792  *
 793  * @param cls solver
 794  * @param key peer identity
 795  * @param value address
 796  * @return whether iterator should continue
 797  */
 798 static int
 799 ril_init_agents_it (void *cls, const struct GNUNET_HashCode *key, void *value)
 800 {
 801   struct GAS_RIL_Handle *solver = cls;
 802   struct ATS_Address *address = value;
 803   struct RIL_Peer_Agent *agent = NULL;
 804   uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
 805
 806   if (ril_network_is_active(address->solver_information))
 807   {
 808     agent = ril_get_agent (solver, &address->peer, GNUNET_YES);
 809
 810     GNUNET_assert(NULL != agent);
 811
 812     if (NULL == agent->address)
 813     {
 814       agent->address = address;
 815       agent->address->active = GNUNET_YES;
 816       agent->bw_in = min_bw;
 817       agent->address->assigned_bw_in.value__ = htonl (min_bw);
 818       agent->bw_out = min_bw;
 819       agent->address->assigned_bw_out.value__ = htonl (min_bw);
 820     }
 821   }
 822   return GNUNET_YES;
 823 }
 824
 825 /**
 826  * Lookup network struct by type
 827  *
 828  * @param s the solver handle
 829  * @param type the network type
 830  * @return the network struct
 831  */
 832 static struct RIL_Network *
 833 ril_get_network (struct GAS_RIL_Handle *s, uint32_t type)
 834 {
 835   int i;
 836
 837   for (i = 0; i < s->networks_count; i++)
 838   {
 839     if (s->network_entries[i].type == type) {
 840       return &s->network_entries[i];
 841     }
 842   }
 843   return NULL;
 844 }
 845
 846 /**
 847  *  Solver API functions
 848  *  ---------------------------
 849  */
 850
 851 /**
 852  * Changes the preferences for a peer in the problem
 853  *
 854  * @param solver the solver handle
 855  * @param peer the peer to change the preference for
 856  * @param kind the kind to change the preference
 857  * @param pref_rel the normalized preference value for this kind over all clients
 858  */
 859 void
 860 GAS_ril_address_change_preference (void *s,
 861     const struct GNUNET_PeerIdentity *peer,
 862     enum GNUNET_ATS_PreferenceKind kind,
 863     double pref_rel)
 864 {
 865   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
 866       "API_address_change_preference() Preference `%s' for peer `%s' changed to %.2f \n",
 867       GNUNET_ATS_print_preference_type (kind), GNUNET_i2s (peer), pref_rel);
 868   /*
 869    * Nothing to do here. Preferences are considered during reward calculation.
 870    */
 871 }
 872
 873 /**
 874  * Init the reinforcement learning problem solver
 875  *
 876  * Quotas:
 877  * network[i] contains the network type as type GNUNET_ATS_NetworkType[i]
 878  * out_quota[i] contains outbound quota for network type i
 879  * in_quota[i] contains inbound quota for network type i
 880  *
 881  * Example
 882  * network = {GNUNET_ATS_NET_UNSPECIFIED, GNUNET_ATS_NET_LOOPBACK, GNUNET_ATS_NET_LAN, GNUNET_ATS_NET_WAN, GNUNET_ATS_NET_WLAN}
 883  * network[2]   == GNUNET_ATS_NET_LAN
 884  * out_quota[2] == 65353
 885  * in_quota[2]  == 65353
 886  *
 887  * @param cfg configuration handle
 888  * @param stats the GNUNET_STATISTICS handle
 889  * @param network array of GNUNET_ATS_NetworkType with length dest_length
 890  * @param addresses hashmap containing all addresses
 891  * @param out_quota array of outbound quotas
 892  * @param in_quota array of outbound quota
 893  * @param dest_length array length for quota arrays
 894  * @param bw_changed_cb callback for changed bandwidth amounts
 895  * @param bw_changed_cb_cls cls for callback
 896  * @param get_preference callback to get relative preferences for a peer
 897  * @param get_preference_cls cls for callback to get relative preferences
 898  * @param get_properties_cls for callback to get relative properties
 899  * @param get_properties_cls cls for callback to get relative properties
 900  * @return handle for the solver on success, NULL on fail
 901  */
 902 void *
 903 GAS_ril_init (const struct GNUNET_CONFIGURATION_Handle *cfg,
 904     const struct GNUNET_STATISTICS_Handle *stats,
 905     const struct GNUNET_CONTAINER_MultiHashMap *addresses,
 906     int *network,
 907     unsigned long long *out_quota,
 908     unsigned long long *in_quota,
 909     int dest_length,
 910     GAS_bandwidth_changed_cb bw_changed_cb,
 911     void *bw_changed_cb_cls,
 912     GAS_get_preferences get_preference,
 913     void *get_preference_cls,
 914     GAS_get_properties get_properties,
 915     void *get_properties_cls)
 916 {
 917   int c;
 918   unsigned long long tmp;
 919   char *string;
 920   struct RIL_Network * cur;
 921   struct GAS_RIL_Handle *solver = GNUNET_malloc (sizeof (struct GAS_RIL_Handle));
 922
 923   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "API_init() Initializing RIL solver\n");
 924
 925   GNUNET_assert(NULL != cfg);
 926   GNUNET_assert(NULL != stats);
 927   GNUNET_assert(NULL != network);
 928   GNUNET_assert(NULL != bw_changed_cb);
 929   GNUNET_assert(NULL != get_preference);
 930   GNUNET_assert(NULL != get_properties);
 931
 932   if (GNUNET_OK
 933       != GNUNET_CONFIGURATION_get_value_time (cfg, "ats", "RIL_STEP_TIME",
 934           &solver->step_time))
 935   {
 936     solver->step_time = RIL_DEFAULT_STEP_TIME;
 937   }
 938   if (GNUNET_OK
 939       == GNUNET_CONFIGURATION_get_value_string (cfg, "ats", "RIL_ALGORITHM",
 940           &string) && NULL != string && 0 == strcmp (string, "SARSA"))
 941   {
 942     solver->parameters.algorithm = RIL_ALGO_SARSA;
 943   }
 944   else
 945   {
 946     solver->parameters.algorithm = RIL_DEFAULT_ALGORITHM;
 947   }
 948   if (GNUNET_OK
 949       == GNUNET_CONFIGURATION_get_value_size (cfg, "ats", "RIL_DISCOUNT_FACTOR",
 950           &tmp))
 951   {
 952     solver->parameters.gamma = (double) tmp / 100;
 953   }
 954   else
 955   {
 956     solver->parameters.gamma = RIL_DEFAULT_DISCOUNT_FACTOR;
 957   }
 958   if (GNUNET_OK
 959       == GNUNET_CONFIGURATION_get_value_size (cfg, "ats",
 960           "RIL_GRADIENT_STEP_SIZE", &tmp))
 961   {
 962     solver->parameters.alpha = (double) tmp / 100;
 963   }
 964   else
 965   {
 966     solver->parameters.alpha = RIL_DEFAULT_GRADIENT_STEP_SIZE;
 967   }
 968   if (GNUNET_OK
 969       == GNUNET_CONFIGURATION_get_value_size (cfg, "ats", "RIL_TRACE_DECAY",
 970           &tmp))
 971   {
 972     solver->parameters.lambda = (double) tmp / 100;
 973   }
 974   else
 975   {
 976     solver->parameters.lambda = RIL_DEFAULT_TRACE_DECAY;
 977   }
 978
 979   solver->stats = (struct GNUNET_STATISTICS_Handle *) stats;
 980   solver->callbacks = GNUNET_malloc (sizeof (struct RIL_Callbacks));
 981   solver->callbacks->bw_changed = bw_changed_cb;
 982   solver->callbacks->bw_changed_cls = bw_changed_cb_cls;
 983   solver->callbacks->get_preferences = get_preference;
 984   solver->callbacks->get_preferences_cls = get_preference_cls;
 985   solver->callbacks->get_properties = get_properties;
 986   solver->callbacks->get_properties_cls = get_properties_cls;
 987   solver->networks_count = dest_length;
 988   solver->network_entries =
 989       GNUNET_malloc (dest_length * sizeof (struct RIL_Network));
 990   solver->bulk_lock = GNUNET_NO;
 991   solver->addresses = addresses;
 992   solver->step_count = 0;
 993
 994   for (c = 0; c < dest_length; c++)
 995   {
 996     cur = &solver->network_entries[c];
 997     cur->type = network[c];
 998     cur->bw_in_available = in_quota[c];
 999     cur->bw_in_assigned = 0;
1000     cur->bw_out_available = out_quota[c];
1001     cur->bw_out_assigned = 0;
1002   }
1003
1004   solver->next_step = GNUNET_SCHEDULER_add_delayed (
1005       GNUNET_TIME_relative_multiply (GNUNET_TIME_relative_get_millisecond_ (),
1006           1000), &ril_periodic_step, solver);
1007
1008   return solver;
1009 }
1010
1011 /**
1012  * Shutdown the reinforcement learning problem solver
1013  *
1014  * @param solver the respective handle to shutdown
1015  */
1016 void
1017 GAS_ril_done (void * solver)
1018 {
1019   struct GAS_RIL_Handle *s = solver;
1020   struct RIL_Peer_Agent *cur_agent;
1021   struct RIL_Peer_Agent *next_agent;
1022
1023   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "API_done() Shutting down RIL solver\n");
1024
1025   cur_agent = s->agents_head;
1026   while (NULL != cur_agent)
1027   {
1028     next_agent = cur_agent->next;
1029     GNUNET_CONTAINER_DLL_remove(s->agents_head, s->agents_tail, cur_agent);
1030     agent_die (s, cur_agent);
1031     cur_agent = next_agent;
1032   }
1033
1034   GNUNET_SCHEDULER_cancel (s->next_step);
1035   GNUNET_free(s->callbacks);
1036   GNUNET_free(s->network_entries);
1037   GNUNET_free(s);
1038 }
1039
1040 /**
1041  * Add a single address within a network to the solver
1042  *
1043  * @param solver the solver Handle
1044  * @param address the address to add
1045  * @param network network type of this address
1046  */
1047 void
1048 GAS_ril_address_add (void *solver,
1049     struct ATS_Address *address,
1050     uint32_t network)
1051 {
1052   struct GAS_RIL_Handle *s = solver;
1053   //TODO! implement solver address add
1054   /*
1055    * if (new peer)
1056    *     initialize new agent
1057    * Add address
1058    * increase state vector
1059    * knowledge matrix
1060    * and action vector
1061    */
1062
1063   address->solver_information = ril_get_network(s, network);
1064
1065   /*
1066    * reiterate all addresses, create new agent if necessary and give the agent the address
1067    */
1068   GNUNET_CONTAINER_multihashmap_iterate (s->addresses, &ril_init_agents_it,
1069       solver);
1070
1071   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1072       "API_address_add() Added %s address for peer '%s'\n", address->plugin,
1073       GNUNET_i2s (&address->peer));
1074 }
1075
1076 /**
1077  * Remove an address from the solver
1078  *
1079  * @param solver the solver handle
1080  * @param address the address to remove
1081  * @param session_only delete only session not whole address
1082  */
1083 void
1084 GAS_ril_address_delete (void *solver,
1085     struct ATS_Address *address,
1086     int session_only)
1087 {
1088   //TODO! implement solver address delete
1089   //TODO! delete session only
1090   /*
1091    * remove address
1092    * if (last address of peer)
1093    *     remove agent
1094    * else
1095    *     decrease state vector
1096    *     decrease knowledge matrix
1097    *     decrease action vector
1098    */
1099   struct GAS_RIL_Handle *s = solver;
1100   struct RIL_Peer_Agent *agent;
1101
1102   agent = ril_get_agent (s, &address->peer, GNUNET_NO);
1103
1104   if (NULL == agent)
1105   {
1106     GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "API_address_delete() deleting address for unallocated agent");
1107     return;
1108   }
1109
1110   if (0 == memcmp (agent->address->addr, address->addr, address->addr_len)) //if used address deleted
1111   {
1112     agent->address = NULL; //delete address
1113     GNUNET_CONTAINER_multihashmap_iterate (s->addresses, &ril_init_agents_it,
1114         solver); //put another address
1115     if (NULL == agent->address) //no other address available
1116     {
1117       agent->active = GNUNET_NO;
1118       ril_remove_agent (solver, agent);
1119     }
1120   }
1121
1122   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1123       "API_address_delete() Deleted %s%s address for peer '%s'\n",
1124       session_only ? "session for " : "", address->plugin,
1125       GNUNET_i2s (&address->peer));
1126 }
1127
1128 /**
1129  * Transport properties for this address have changed
1130  *
1131  * @param solver solver handle
1132  * @param address the address
1133  * @param type the ATSI type in HBO
1134  * @param abs_value the absolute value of the property
1135  * @param rel_value the normalized value
1136  */
1137 void
1138 GAS_ril_address_property_changed (void *solver,
1139     struct ATS_Address *address,
1140     uint32_t type,
1141     uint32_t abs_value,
1142     double rel_value)
1143 {
1144   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1145       "API_address_property_changed() Property `%s' for peer `%s' address %p changed "
1146           "to %.2f \n", GNUNET_ATS_print_property_type (type),
1147       GNUNET_i2s (&address->peer), address, rel_value);
1148   /*
1149    * Nothing to do here, properties are considered in every reward calculation
1150    */
1151 }
1152
1153 /**
1154  * Transport session for this address has changed
1155  *
1156  * NOTE: values in addresses are already updated
1157  *
1158  * @param solver solver handle
1159  * @param address the address
1160  * @param cur_session the current session
1161  * @param new_session the new session
1162  */
1163 void
1164 GAS_ril_address_session_changed (void *solver,
1165     struct ATS_Address *address,
1166     uint32_t cur_session,
1167     uint32_t new_session)
1168 {
1169   //TODO? consider session changed in solver behaviour
1170   /*
1171    * Potentially add session activity as a feature in state vector
1172    */
1173   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "API_address_session_changed()\n");
1174 }
1175
1176 /**
1177  * Usage for this address has changed
1178  *
1179  * NOTE: values in addresses are already updated
1180  *
1181  * @param solver solver handle
1182  * @param address the address
1183  * @param in_use usage state
1184  */
1185 void
1186 GAS_ril_address_inuse_changed (void *solver,
1187     struct ATS_Address *address,
1188     int in_use)
1189 {
1190   //TODO! consider address_inuse_changed according to matthias' email
1191   /**
1192    * See matthias' email
1193    */
1194   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1195       "API_address_inuse_changed() Usage for %s address of peer '%s' changed to %s\n",
1196       address->plugin, GNUNET_i2s (&address->peer),
1197       (GNUNET_YES == in_use) ? "USED" : "UNUSED");
1198 }
1199
1200 /**
1201  * Network scope for this address has changed
1202  *
1203  * NOTE: values in addresses are already updated
1204  *
1205  * @param solver solver handle
1206  * @param address the address
1207  * @param current_network the current network
1208  * @param new_network the new network
1209  */
1210 void
1211 GAS_ril_address_change_network (void *solver,
1212     struct ATS_Address *address,
1213     uint32_t current_network,
1214     uint32_t new_network)
1215 {
1216   struct GAS_RIL_Handle *s = solver;
1217   struct RIL_Peer_Agent *agent;
1218   struct RIL_Network *net;
1219
1220   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1221       "API_address_change_network() Network type changed, moving "
1222           "%s address of peer %s from `%s' to `%s'\n",
1223       (GNUNET_YES == address->active) ? "active" : "inactive",
1224       GNUNET_i2s (&address->peer),
1225       GNUNET_ATS_print_network_type (current_network),
1226       GNUNET_ATS_print_network_type (new_network));
1227
1228   address->solver_information = ril_get_network(solver, new_network);
1229
1230   if (address->active)
1231   {
1232     agent = ril_get_agent(solver, &address->peer, GNUNET_NO);
1233
1234     //remove from old network
1235     net = ril_get_network (s, current_network);
1236     net->bw_in_assigned -= agent->bw_in;
1237     net->bw_out_assigned -= agent->bw_out;
1238
1239     if (ril_network_is_active(ril_get_network(s, new_network)))
1240     {
1241       //add to new network
1242       net = ril_get_network (s, new_network);
1243       net->bw_in_assigned += agent->bw_in;
1244       net->bw_out_assigned += agent->bw_out;
1245
1246       GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1247           "API_address_change_network() Moved %d inbound and %d "
1248               "outbound\n", agent->bw_in, agent->bw_out);
1249     }
1250     else //new network for this address is not active => address must not be considered
1251     {
1252       address->active = GNUNET_NO;
1253       agent->address = NULL; //delete address
1254           GNUNET_CONTAINER_multihashmap_iterate (s->addresses, &ril_init_agents_it,
1255               solver); //put another address
1256       if (NULL == agent->address) //no other address available
1257       {
1258         agent->active = GNUNET_NO;
1259         ril_remove_agent(s, agent);
1260       }
1261     }
1262   }
1263 }
1264
1265 /**
1266  * Get application feedback for a peer
1267  *
1268  * @param solver the solver handle
1269  * @param application the application
1270  * @param peer the peer to change the preference for
1271  * @param scope the time interval for this feedback: [now - scope .. now]
1272  * @param kind the kind to change the preference
1273  * @param score the score
1274  */
1275 void
1276 GAS_ril_address_preference_feedback (void *solver,
1277     void *application,
1278     const struct GNUNET_PeerIdentity *peer,
1279     const struct GNUNET_TIME_Relative scope,
1280     enum GNUNET_ATS_PreferenceKind kind,
1281     double score)
1282 {
1283   //TODO! collect reward until next reward calculation
1284   //TODO! Find out application
1285   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1286       "API_address_preference_feedback() Peer '%s' got a feedback of %+.3f from application %s for "
1287           "preference %s for %d seconds\n", GNUNET_i2s (peer), "UNKNOWN",
1288       GNUNET_ATS_print_preference_type (kind), scope.rel_value_us / 1000000);
1289 }
1290
1291 /**
1292  * Start a bulk operation
1293  *
1294  * @param solver the solver
1295  */
1296 void
1297 GAS_ril_bulk_start (void *solver)
1298 {
1299   //TODO? consideration: keep bulk counter and stop agents during bulk
1300   /*
1301    * bulk counter up, but not really relevant, because there is no complete calculation of the
1302    * bandwidth assignment triggered anyway. Therefore, changes to addresses can come and go as
1303    * they want. Consideration: Step-pause during bulk-start-stop period...
1304    */
1305
1306   //GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "API_bulk_start()\n");
1307 }
1308
1309 /**
1310  * Bulk operation done
1311  */
1312 void
1313 GAS_ril_bulk_stop (void *solver)
1314 {
1315   //TODO? consideration: keep bulk counter and stop agents during bulk
1316   /*
1317    * bulk counter down, see bulk_start()
1318    */
1319
1320   //GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "API_bulk_stop()\n");
1321 }
1322
1323 /**
1324  * Get the preferred address for a specific peer
1325  *
1326  * @param solver the solver handle
1327  * @param peer the identity of the peer
1328  */
1329 const struct ATS_Address *
1330 GAS_ril_get_preferred_address (void *solver,
1331     const struct GNUNET_PeerIdentity *peer)
1332 {
1333   /*
1334    * activate agent, return currently chosen address
1335    */
1336   struct GAS_RIL_Handle *s = solver;
1337   struct RIL_Peer_Agent *agent;
1338
1339   agent = ril_get_agent (s, peer, GNUNET_NO);
1340
1341   if (NULL == agent)
1342   {
1343     return NULL;
1344   }
1345
1346   agent->active = GNUNET_YES;
1347
1348   GNUNET_assert(NULL != agent->address);
1349
1350   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1351       "API_get_preferred_address() Activated agent for peer '%s' with %s address\n",
1352       GNUNET_i2s (peer), agent->address->plugin);
1353
1354   return agent->address;
1355 }
1356
1357 /**
1358  * Stop notifying about address and bandwidth changes for this peer
1359  *
1360  * @param solver the solver handle
1361  * @param peer the peer
1362  */
1363 void
1364 GAS_ril_stop_get_preferred_address (void *solver,
1365     const struct GNUNET_PeerIdentity *peer)
1366 {
1367   struct GAS_RIL_Handle *s = solver;
1368   struct RIL_Peer_Agent *agent;
1369
1370   agent = ril_get_agent (s, peer, GNUNET_NO);
1371   agent->active = GNUNET_NO;
1372
1373   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1374       "API_stop_get_preferred_address() Paused agent for peer '%s' with %s address\n",
1375       GNUNET_i2s (peer), agent->address->plugin);
1376 }
1377
1378 /* end of gnunet-service-ats-solver_ril.c */