src/ats/gnunet-service-ats-solver_ril.c

   1 /*
   2  This file is part of GNUnet.
   3  (C) 2011 Christian Grothoff (and other contributing authors)
   4
   5  GNUnet is free software; you can redistribute it and/or modify
   6  it under the terms of the GNU General Public License as published
   7  by the Free Software Foundation; either version 3, or (at your
   8  option) any later version.
   9
  10  GNUnet is distributed in the hope that it will be useful, but
  11  WITHOUT ANY WARRANTY; without even the implied warranty of
  12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  General Public License for more details.
  14
  15  You should have received a copy of the GNU General Public License
  16  along with GNUnet; see the file COPYING.  If not, write to the
  17  Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18  Boston, MA 02111-1307, USA.
  19  */
  20
  21 /**
  22  * @file ats/gnunet-service-ats-solver_ril.c
  23  * @brief ATS reinforcement learning solver
  24  * @author Fabian Oehlmann
  25  * @author Matthias Wachs
  26  */
  27 #include "platform.h"
  28 #include "float.h"
  29 #include "gnunet_util_lib.h"
  30 #include "gnunet-service-ats_addresses.h"
  31 #include "gnunet_statistics_service.h"
  32
  33 #define RIL_DEFAULT_STEP_TIME GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 3000)
  34 #define RIL_DEFAULT_ALGORITHM RIL_ALGO_Q
  35 #define RIL_DEFAULT_DISCOUNT_FACTOR 0.5
  36 #define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.4
  37 #define RIL_DEFAULT_TRACE_DECAY 0.6
  38 #define RIL_EXPLORE_RATIO 0.1
  39
  40 /**
  41  * ATS reinforcement learning solver
  42  *
  43  * General description
  44  */
  45
  46 enum RIL_Action_Type
  47 {
  48   RIL_ACTION_BW_IN_DBL = 0,
  49   RIL_ACTION_BW_OUT_DBL = 1,
  50   RIL_ACTION_BW_IN_HLV = 2,
  51   RIL_ACTION_BW_OUT_HLV = 3,
  52   RIL_ACTION_TYPE_NUM = 4
  53 };
  54 //TODO! add the rest of the actions
  55
  56 enum RIL_Algorithm
  57 {
  58   RIL_ALGO_SARSA, RIL_ALGO_Q
  59 };
  60
  61 enum RIL_E_Modification
  62 {
  63   RIL_E_SET, RIL_E_ZERO, RIL_E_ACCUMULATE, RIL_E_REPLACE
  64 };
  65
  66 /**
  67  * Global learning parameters
  68  */
  69 struct RIL_Learning_Parameters
  70 {
  71   /**
  72    * The TD-algorithm to use
  73    */
  74   enum RIL_Algorithm algorithm;
  75
  76   /**
  77    * Learning discount factor in the TD-update
  78    */
  79   float gamma;
  80
  81   /**
  82    * Gradient-descent step-size
  83    */
  84   float alpha;
  85
  86   /**
  87    * Trace-decay factor for eligibility traces
  88    */
  89   float lambda;
  90 };
  91
  92 struct RIL_Peer_Agent
  93 {
  94   /**
  95    * Next agent in solver's linked list
  96    */
  97   struct RIL_Peer_Agent *next;
  98
  99   /**
 100    * Previous agent in solver's linked list
 101    */
 102   struct RIL_Peer_Agent *prev;
 103
 104   /**
 105    * Environment handle
 106    */
 107   struct GAS_RIL_Handle *envi;
 108
 109   /**
 110    * Peer ID
 111    */
 112   struct GNUNET_PeerIdentity peer;
 113
 114   /**
 115    * Whether the agent is active or not
 116    */
 117   int active;
 118
 119   /**
 120    * Number of performed time-steps
 121    */
 122   unsigned long long step_count;
 123
 124   /**
 125    * Experience matrix W
 126    */
 127   double ** W;
 128
 129   /**
 130    * Number of rows of W / Number of state-vector features
 131    */
 132   int m;
 133
 134   /**
 135    * Number of columns of W / Number of actions
 136    */
 137   int n;
 138
 139   /**
 140    * Last perceived state feature vector
 141    */
 142   double * s_old;
 143
 144   /**
 145    * Last chosen action
 146    */
 147   int a_old;
 148
 149   /**
 150    * Eligibility trace vector
 151    */
 152   double * e;
 153
 154   /**
 155    * Address in use
 156    */
 157   struct ATS_Address * address;
 158
 159   /**
 160    * Inbound bandwidth assigned by the agent
 161    */
 162   unsigned long long bw_in;
 163
 164   /**
 165    * Outbound bandwidth assigned by the agent
 166    */
 167   unsigned long long bw_out;
 168 };
 169
 170 struct RIL_Network
 171 {
 172   /**
 173    * ATS network type
 174    */
 175   enum GNUNET_ATS_Network_Type type;
 176
 177   /**
 178    * Total available inbound bandwidth
 179    */
 180   unsigned long long bw_in_available;
 181
 182   /**
 183    * Total assigned outbound bandwidth
 184    */
 185   unsigned long long bw_in_assigned;
 186
 187   /**
 188    * Total available outbound bandwidth
 189    */
 190   unsigned long long bw_out_available;
 191
 192   /**
 193    * Total assigned outbound bandwidth
 194    */
 195   unsigned long long bw_out_assigned;
 196 };
 197
 198 struct RIL_Callbacks
 199 {
 200   /**
 201    * Bandwidth changed callback
 202    */
 203   GAS_bandwidth_changed_cb bw_changed;
 204
 205   /**
 206    * Bandwidth changed callback cls
 207    */
 208   void *bw_changed_cls;
 209
 210   /**
 211    * ATS function to get preferences
 212    */
 213   GAS_get_preferences get_preferences;
 214
 215   /**
 216    * Closure for ATS function to get preferences
 217    */
 218   void *get_preferences_cls;
 219
 220   /**
 221    * ATS function to get properties
 222    */
 223   GAS_get_properties get_properties;
 224
 225   /**
 226    * Closure for ATS function to get properties
 227    */
 228   void *get_properties_cls;
 229 };
 230
 231 /**
 232  * A handle for the reinforcement learning solver
 233  */
 234 struct GAS_RIL_Handle
 235 {
 236   /**
 237    * Statistics handle
 238    */
 239   struct GNUNET_STATISTICS_Handle *stats;
 240
 241   /**
 242    * Hashmap containing all valid addresses
 243    */
 244   const struct GNUNET_CONTAINER_MultiHashMap *addresses;
 245
 246   /**
 247    * Callbacks for the solver
 248    */
 249   struct RIL_Callbacks *callbacks;
 250
 251   /**
 252    * Bulk lock
 253    */
 254   int bulk_lock;
 255
 256   /**
 257    * Number of changes while solver was locked
 258    */
 259   int bulk_requests;
 260
 261   /**
 262    * Number of performed time-steps
 263    */
 264   unsigned long long step_count;
 265
 266   /**
 267    * Interval time between steps in milliseconds //TODO? put in agent
 268    */
 269   struct GNUNET_TIME_Relative step_time;
 270
 271   /**
 272    * Task identifier of the next time-step to be executed //TODO? put in agent
 273    */
 274   GNUNET_SCHEDULER_TaskIdentifier next_step;
 275
 276   /**
 277    * Learning parameters
 278    */
 279   struct RIL_Learning_Parameters parameters;
 280
 281   /**
 282    * Array of networks with global assignment state
 283    */
 284   struct RIL_Network * network_entries;
 285
 286   /**
 287    * Networks count
 288    */
 289   unsigned int networks_count;
 290
 291   /**
 292    * List of active peer-agents
 293    */
 294   struct RIL_Peer_Agent * agents_head;
 295   struct RIL_Peer_Agent * agents_tail;
 296 };
 297
 298 /**
 299  *  Private functions
 300  *  ---------------------------
 301  */
 302
 303 /**
 304  * Estimate the current action-value for state s and action a
 305  * @param agent agent performing the estimation
 306  * @param state s
 307  * @param action a
 308  * @return estimation value
 309  */
 310 static double
 311 agent_estimate_q (struct RIL_Peer_Agent *agent, double *state, int action)
 312 {
 313   int i;
 314   double result = 0;
 315
 316   for (i = 0; i < agent->m; i++)
 317   {
 318     result += state[i] * (agent->W)[agent->m][action];
 319   }
 320
 321   return result;
 322 }
 323
 324 /**
 325  * Decide whether to do exploration (i.e. taking a new action) or exploitation (i.e. taking the
 326  * currently estimated best action) in the current step
 327  * @param agent agent performing the step
 328  * @return yes, if exploring
 329  */
 330 static int
 331 agent_decide_exploration (struct RIL_Peer_Agent *agent)
 332 {
 333   double r = (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,
 334       UINT32_MAX) / (double) UINT32_MAX;
 335
 336   if (r < RIL_EXPLORE_RATIO)
 337   {
 338     return GNUNET_YES;
 339   }
 340   return GNUNET_NO;
 341 }
 342
 343 /**
 344  * Gets the action, with the maximal estimated Q-value (i.e. the one currently estimated to bring the
 345  * most reward in the future)
 346  * @param agent agent performing the calculation
 347  * @param state the state from which to take the action
 348  * @return the action promising most future reward
 349  */
 350 static int
 351 agent_get_action_best (struct RIL_Peer_Agent *agent, double *state)
 352 {
 353   int i;
 354   int max_i = -1;
 355   double cur_q;
 356   double max_q = DBL_MIN;
 357
 358   for (i = 0; i < agent->n; i++)
 359   {
 360     cur_q = agent_estimate_q (agent, state, i);
 361     if (cur_q > max_q)
 362     {
 363       max_q = cur_q;
 364       max_i = i;
 365     }
 366   }
 367
 368   GNUNET_assert(-1 != max_i);
 369
 370   return max_i;
 371 }
 372
 373 /**
 374  * Gets any action, to explore the action space from that state
 375  * @param agent agent performing the calculation
 376  * @param state the state from which to take the action
 377  * @return any action
 378  */
 379 static int
 380 agent_get_action_explore (struct RIL_Peer_Agent *agent, double *state)
 381 {
 382   return GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK, agent->n);
 383 }
 384
 385 /**
 386  * Updates the weights (i.e. coefficients) of the weight vector in matrix W for action a
 387  * @param agent the agent performing the update
 388  * @param reward the reward received for the last action
 389  * @param s_next the new state, the last step got the agent into
 390  * @param a_prime the new
 391  */
 392 static void
 393 agent_update_weights (struct RIL_Peer_Agent *agent,
 394     double reward,
 395     double *s_next,
 396     int a_prime)
 397 {
 398   int i;
 399   double delta;
 400   double *theta = (agent->W)[agent->a_old];
 401
 402   delta = reward + agent_estimate_q (agent, s_next, a_prime)
 403       - agent_estimate_q (agent, agent->s_old, agent->a_old);
 404   for (i = 0; i < agent->m; i++)
 405   {
 406     theta[i] += agent->envi->parameters.alpha * delta * (agent->e)[i];
 407   }
 408 }
 409
 410 /**
 411  * Changes the eligibility trace vector e in various manners:
 412  * RIL_E_ACCUMULATE - adds 1 to each component as in accumulating eligibility traces
 413  * RIL_E_REPLACE - resets each component to 1 as in replacing traces
 414  * RIL_E_SET - multiplies e with gamma and lambda as in the update rule
 415  * RIL_E_ZERO - sets e to 0 as in Watkin's Q-learning algorithm when exploring and when initializing
 416  * @param agent
 417  * @param mod
 418  */
 419 static void
 420 agent_modify_eligibility (struct RIL_Peer_Agent *agent,
 421     enum RIL_E_Modification mod)
 422 {
 423   int i;
 424   double *e = agent->e;
 425   double gamma = agent->envi->parameters.gamma;
 426   double lambda = agent->envi->parameters.lambda;
 427
 428   for (i = 0; i < agent->m; i++)
 429   {
 430     switch (mod)
 431     {
 432     case RIL_E_ACCUMULATE:
 433       e[i] += 1;
 434       break;
 435     case RIL_E_REPLACE:
 436       e[i] = 1;
 437       break;
 438     case RIL_E_SET:
 439       e[i] = gamma * lambda;
 440       break;
 441     case RIL_E_ZERO:
 442       e[i] = 0;
 443       break;
 444     }
 445   }
 446 }
 447
 448 /**
 449  * Allocates a state vector and fills it with the features present
 450  * @param solver the solver handle
 451  * @return pointer to the state vector
 452  */
 453 static double *
 454 envi_get_state (struct GAS_RIL_Handle *solver)
 455 {
 456   int i;
 457   struct RIL_Network *net;
 458   double *state = GNUNET_malloc (sizeof (double) * solver->networks_count * 4);
 459
 460   for (i = 0; i < solver->networks_count; i += 4)
 461   {
 462     net = (&solver->network_entries)[i];
 463     state[i] = (double) net->bw_in_assigned;
 464     state[i + 1] = (double) net->bw_in_available;
 465     state[i + 2] = (double) net->bw_out_assigned;
 466     state[i + 3] = (double) net->bw_out_available;
 467   }
 468
 469   return state;
 470 }
 471
 472 /**
 473  * Gets the reward of the last performed step
 474  * @param solver solver handle
 475  * @return the reward
 476  */
 477 static double
 478 envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
 479 {
 480   //TODO! implement reward calculation
 481
 482   return (double) GNUNET_CRYPTO_random_u32 (GNUNET_CRYPTO_QUALITY_WEAK,
 483       UINT32_MAX) / (double) UINT32_MAX;
 484 }
 485
 486 static void
 487 envi_action_bw_double (struct GAS_RIL_Handle *solver,
 488     struct RIL_Peer_Agent *agent,
 489     int direction_in)
 490 {
 491   if (direction_in)
 492   {
 493     agent->bw_in *= 2;
 494     agent->address->assigned_bw_in.value__ = htonl (agent->bw_in);
 495     solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls,
 496         agent->address);
 497   }
 498   else
 499   {
 500     agent->bw_out *= 2;
 501     agent->address->assigned_bw_out.value__ = htonl (agent->bw_out);
 502     solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls,
 503         agent->address);
 504   }
 505 }
 506
 507 static void
 508 envi_action_bw_halven (struct GAS_RIL_Handle *solver,
 509     struct RIL_Peer_Agent *agent,
 510     int direction_in)
 511 {
 512   if ((direction_in && 1 == agent->bw_in)
 513       || (!direction_in && 1 == agent->bw_out))
 514   {
 515     return;
 516   }
 517   if (direction_in)
 518   {
 519     agent->bw_in /= 2;
 520     agent->address->assigned_bw_in.value__ = htonl (agent->bw_in);
 521     solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls,
 522         agent->address);
 523   }
 524   else
 525   {
 526     agent->bw_out /= 2;
 527     agent->address->assigned_bw_out.value__ = htonl (agent->bw_out);
 528     solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls,
 529         agent->address);
 530   }
 531 }
 532
 533 /**
 534  * Puts the action into effect
 535  * @param solver solver handle
 536  * @param action action to perform by the solver
 537  */
 538 static void
 539 envi_do_action (struct GAS_RIL_Handle *solver,
 540     struct RIL_Peer_Agent *agent,
 541     int action)
 542 {
 543   switch (action)
 544   {
 545   case RIL_ACTION_BW_IN_DBL:
 546     envi_action_bw_double (solver, agent, GNUNET_YES);
 547     break;
 548   case RIL_ACTION_BW_IN_HLV:
 549     envi_action_bw_halven (solver, agent, GNUNET_YES);
 550     break;
 551   case RIL_ACTION_BW_OUT_DBL:
 552     envi_action_bw_double (solver, agent, GNUNET_NO);
 553     break;
 554   case RIL_ACTION_BW_OUT_HLV:
 555     envi_action_bw_halven (solver, agent, GNUNET_NO);
 556     break;
 557   }
 558 }
 559
 560 /**
 561  * Performs one step of the Markov Decision Process. Other than in the literature the step starts
 562  * after having done the last action a_old. It observes the new state s_next and the reward
 563  * received. Then the coefficient update is done according to the SARSA or Q-learning method. The
 564  * next action is put into effect.
 565  * @param agent the agent performing the step
 566  */
 567 static void
 568 agent_step (struct RIL_Peer_Agent *agent)
 569 {
 570   int a_next = -1;
 571   double *s_next;
 572   double reward;
 573
 574   s_next = envi_get_state (agent->envi);
 575   reward = envi_get_reward (agent->envi, agent);
 576
 577   switch (agent->envi->parameters.algorithm)
 578   {
 579   case RIL_ALGO_SARSA:
 580     agent_modify_eligibility (agent, RIL_E_SET);
 581     if (agent_decide_exploration (agent))
 582     {
 583       a_next = agent_get_action_explore (agent, s_next);
 584     }
 585     else
 586     {
 587       a_next = agent_get_action_best (agent, s_next);
 588     }
 589     //updates weights with selected action (on-policy)
 590     agent_update_weights (agent, reward, s_next, a_next);
 591     break;
 592
 593   case RIL_ALGO_Q:
 594     //updates weights with best action, disregarding actually selected action (off-policy)
 595     a_next = agent_get_action_best (agent, s_next);
 596     agent_update_weights (agent, reward, s_next, a_next);
 597     if (agent_decide_exploration (agent))
 598     {
 599       a_next = agent_get_action_explore (agent, s_next);
 600       agent_modify_eligibility (agent, RIL_E_ZERO);
 601     }
 602     else
 603     {
 604       a_next = agent_get_action_best (agent, s_next);
 605       agent_modify_eligibility (agent, RIL_E_SET);
 606     }
 607     break;
 608   }
 609
 610   GNUNET_assert(-1 != a_next);
 611
 612   agent_modify_eligibility (agent, RIL_E_ACCUMULATE);
 613
 614   envi_do_action (agent->envi, agent, a_next);
 615
 616   GNUNET_free(agent->s_old);
 617   agent->s_old = s_next;
 618   agent->a_old = a_next;
 619
 620   agent->step_count += 1;
 621 }
 622
 623 /**
 624  * Cycles through all agents and lets the active ones do a step. Schedules the next step.
 625  * @param solver the solver handle
 626  * @param tc task context for the scheduler
 627  */
 628 static void
 629 ril_periodic_step (void *cls, const struct GNUNET_SCHEDULER_TaskContext *tc)
 630 {
 631   struct GAS_RIL_Handle *solver = cls;
 632   struct RIL_Peer_Agent *cur;
 633
 634   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "RIL step number %d\n",
 635       solver->step_count);
 636
 637   for (cur = solver->agents_head; NULL != cur; cur = cur->next)
 638   {
 639     if (cur->active)
 640     {
 641       agent_step (cur);
 642     }
 643   }
 644
 645   solver->step_count += 1;
 646   solver->next_step = GNUNET_SCHEDULER_add_delayed (solver->step_time,
 647       &ril_periodic_step, solver);
 648 }
 649
 650 /**
 651  * Initialize an agent without addresses and its knowledge base
 652  * @param s ril solver
 653  * @param peer the one in question
 654  * @return handle to the new agent
 655  */
 656 static struct RIL_Peer_Agent *
 657 agent_init (void *s, const struct GNUNET_PeerIdentity *peer)
 658 {
 659   int i;
 660   struct GAS_RIL_Handle * solver = s;
 661   struct RIL_Peer_Agent * agent = GNUNET_malloc (sizeof (struct RIL_Peer_Agent));
 662
 663   agent->envi = solver;
 664   agent->peer = *peer;
 665   agent->step_count = 0;
 666   agent->active = GNUNET_NO;
 667   agent->s_old = envi_get_state (solver);
 668   agent->n = RIL_ACTION_TYPE_NUM;
 669   agent->m = solver->networks_count * 4;
 670   agent->W = (double **) GNUNET_malloc (sizeof (double) * agent->n);
 671   for (i = 0; i < agent->n; i++)
 672   {
 673     (agent->W)[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m);
 674   }
 675   agent->a_old = -1;
 676   agent->e = (double *) GNUNET_malloc (sizeof (double) * agent->m);
 677   agent_modify_eligibility (agent, RIL_E_ZERO);
 678
 679   GNUNET_CONTAINER_DLL_insert_tail(solver->agents_head, solver->agents_tail,
 680       agent);
 681
 682   return agent;
 683 }
 684
 685 /**
 686  * Deallocate agent
 687  * @param s solver handle
 688  * @param agent the agent to retire
 689  */
 690 static void
 691 agent_die (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
 692 {
 693   int i;
 694
 695   for (i = 0; i < agent->n; i++)
 696   {
 697     GNUNET_free((agent->W)[i]);
 698   }
 699   GNUNET_free(agent->W);
 700   GNUNET_free(agent->e);
 701   GNUNET_free(agent->s_old);
 702 }
 703
 704 /**
 705  * Counts the (active) agents
 706  * @param solver solver handle
 707  * @param active_only whether only active agents should be counted
 708  * @return number of agents
 709  */
 710 static int
 711 ril_count_agents (struct GAS_RIL_Handle *solver, int active_only)
 712 {
 713   int c;
 714   struct RIL_Peer_Agent *cur;
 715
 716   c = 0;
 717   for (cur = solver->agents_head; NULL != cur; cur = cur->next)
 718   {
 719     if ((!active_only) || (active_only && cur->active))
 720     {
 721       c += 1;
 722     }
 723   }
 724   return c;
 725 }
 726
 727 /**
 728  * Returns the agent for a peer
 729  * @param s solver handle
 730  * @param peer identity of the peer
 731  * @return agent
 732  */
 733 static struct RIL_Peer_Agent *
 734 ril_get_agent (struct GAS_RIL_Handle *solver,
 735     const struct GNUNET_PeerIdentity *peer)
 736 {
 737   struct RIL_Peer_Agent *cur;
 738
 739   for (cur = solver->agents_head; NULL != cur; cur = cur->next)
 740   {
 741     if (0 == GNUNET_CRYPTO_hash_cmp (&peer->hashPubKey, &cur->peer.hashPubKey))
 742     {
 743       return cur;
 744     }
 745   }
 746
 747   return agent_init (solver, peer);
 748 }
 749
 750 /**
 751  * Iterator, which allocates one agent per peer
 752  *
 753  * @param cls solver
 754  * @param key peer identity
 755  * @param value address
 756  * @return whether iterator should continue
 757  */
 758 static int
 759 ril_init_agents_it (void *cls, const struct GNUNET_HashCode *key, void *value)
 760 {
 761   struct GAS_RIL_Handle *solver = cls;
 762   struct ATS_Address *address = value;
 763   struct RIL_Peer_Agent *agent;
 764   uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
 765
 766   agent = ril_get_agent (solver, &address->peer);
 767
 768   GNUNET_assert(NULL != agent);
 769
 770   if (NULL == agent->address)
 771   {
 772     agent->address = address;
 773     agent->address->active = GNUNET_YES;
 774     agent->bw_in = min_bw;
 775     agent->address->assigned_bw_in.value__ = htonl (min_bw);
 776     agent->bw_out = min_bw;
 777     agent->address->assigned_bw_out.value__ = htonl (min_bw);
 778   }
 779
 780   return GNUNET_YES;
 781 }
 782
 783 /**
 784  * Lookup network struct by type
 785  *
 786  * @param s the solver handle
 787  * @param type the network type
 788  * @return the network struct
 789  */
 790 static struct RIL_Network *
 791 ril_get_network (struct GAS_RIL_Handle *s, uint32_t type)
 792 {
 793   int i;
 794   for (i = 0; i < s->networks_count; i++)
 795   {
 796     if (s->network_entries[i].type == type) {
 797       return &s->network_entries[i];
 798     }
 799   }
 800   return NULL;
 801 }
 802
 803 /**
 804  *  Solver API functions
 805  *  ---------------------------
 806  */
 807
 808 /**
 809  * Changes the preferences for a peer in the problem
 810  *
 811  * @param solver the solver handle
 812  * @param peer the peer to change the preference for
 813  * @param kind the kind to change the preference
 814  * @param pref_rel the normalized preference value for this kind over all clients
 815  */
 816 void
 817 GAS_ril_address_change_preference (void *s,
 818     const struct GNUNET_PeerIdentity *peer,
 819     enum GNUNET_ATS_PreferenceKind kind,
 820     double pref_rel)
 821 {
 822   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
 823       "API_address_change_preference() Preference `%s' for peer `%s' changed to %.2f \n",
 824       GNUNET_ATS_print_preference_type (kind), GNUNET_i2s (peer), pref_rel);
 825   /*
 826    * Nothing to do here. Preferences are considered during reward calculation.
 827    */
 828 }
 829
 830 /**
 831  * Init the reinforcement learning problem solver
 832  *
 833  * Quotas:
 834  * network[i] contains the network type as type GNUNET_ATS_NetworkType[i]
 835  * out_quota[i] contains outbound quota for network type i
 836  * in_quota[i] contains inbound quota for network type i
 837  *
 838  * Example
 839  * network = {GNUNET_ATS_NET_UNSPECIFIED, GNUNET_ATS_NET_LOOPBACK, GNUNET_ATS_NET_LAN, GNUNET_ATS_NET_WAN, GNUNET_ATS_NET_WLAN}
 840  * network[2]   == GNUNET_ATS_NET_LAN
 841  * out_quota[2] == 65353
 842  * in_quota[2]  == 65353
 843  *
 844  * @param cfg configuration handle
 845  * @param stats the GNUNET_STATISTICS handle
 846  * @param network array of GNUNET_ATS_NetworkType with length dest_length
 847  * @param addresses hashmap containing all addresses
 848  * @param out_quota array of outbound quotas
 849  * @param in_quota array of outbound quota
 850  * @param dest_length array length for quota arrays
 851  * @param bw_changed_cb callback for changed bandwidth amounts
 852  * @param bw_changed_cb_cls cls for callback
 853  * @param get_preference callback to get relative preferences for a peer
 854  * @param get_preference_cls cls for callback to get relative preferences
 855  * @param get_properties_cls for callback to get relative properties
 856  * @param get_properties_cls cls for callback to get relative properties
 857  * @return handle for the solver on success, NULL on fail
 858  */
 859 void *
 860 GAS_ril_init (const struct GNUNET_CONFIGURATION_Handle *cfg,
 861     const struct GNUNET_STATISTICS_Handle *stats,
 862     const struct GNUNET_CONTAINER_MultiHashMap *addresses,
 863     int *network,
 864     unsigned long long *out_quota,
 865     unsigned long long *in_quota,
 866     int dest_length,
 867     GAS_bandwidth_changed_cb bw_changed_cb,
 868     void *bw_changed_cb_cls,
 869     GAS_get_preferences get_preference,
 870     void *get_preference_cls,
 871     GAS_get_properties get_properties,
 872     void *get_properties_cls)
 873 {
 874   int c;
 875   unsigned long long tmp;
 876   char *string;
 877   struct RIL_Network * cur;
 878   struct GAS_RIL_Handle *solver = GNUNET_malloc (sizeof (struct GAS_RIL_Handle));
 879
 880   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "API_init() Initializing RIL solver\n");
 881
 882   GNUNET_assert(NULL != cfg);
 883   GNUNET_assert(NULL != stats);
 884   GNUNET_assert(NULL != network);
 885   GNUNET_assert(NULL != bw_changed_cb);
 886   GNUNET_assert(NULL != get_preference);
 887   GNUNET_assert(NULL != get_properties);
 888
 889   if (GNUNET_OK
 890       != GNUNET_CONFIGURATION_get_value_time (cfg, "ats", "RIL_STEP_TIME",
 891           &solver->step_time))
 892   {
 893     solver->step_time = RIL_DEFAULT_STEP_TIME;
 894   }
 895   if (GNUNET_OK
 896       != GNUNET_CONFIGURATION_get_value_string (cfg, "ats", "RIL_ALGORITHM",
 897           &string))
 898   {
 899     if (0 == strcmp (string, "SARSA"))
 900     {
 901       solver->parameters.algorithm = RIL_ALGO_SARSA;
 902     }
 903   }
 904   else
 905   {
 906     solver->parameters.algorithm = RIL_DEFAULT_ALGORITHM;
 907   }
 908   if (GNUNET_OK
 909       == GNUNET_CONFIGURATION_get_value_size (cfg, "ats", "RIL_DISCOUNT_FACTOR",
 910           &tmp))
 911   {
 912     solver->parameters.gamma = (double) tmp / 100;
 913     ;
 914   }
 915   else
 916   {
 917     solver->parameters.gamma = RIL_DEFAULT_DISCOUNT_FACTOR;
 918   }
 919   if (GNUNET_OK
 920       == GNUNET_CONFIGURATION_get_value_size (cfg, "ats",
 921           "RIL_GRADIENT_STEP_SIZE", &tmp))
 922   {
 923     solver->parameters.alpha = (double) tmp / 100;
 924     ;
 925   }
 926   else
 927   {
 928     solver->parameters.alpha = RIL_DEFAULT_GRADIENT_STEP_SIZE;
 929   }
 930   if (GNUNET_OK
 931       == GNUNET_CONFIGURATION_get_value_size (cfg, "ats", "RIL_TRACE_DECAY",
 932           &tmp))
 933   {
 934     solver->parameters.lambda = (double) tmp / 100;
 935     ;
 936   }
 937   else
 938   {
 939     solver->parameters.lambda = RIL_DEFAULT_TRACE_DECAY;
 940   }
 941
 942   solver->stats = (struct GNUNET_STATISTICS_Handle *) stats;
 943   solver->callbacks = GNUNET_malloc (sizeof (struct RIL_Callbacks));
 944   solver->callbacks->bw_changed = bw_changed_cb;
 945   solver->callbacks->bw_changed_cls = bw_changed_cb_cls;
 946   solver->callbacks->get_preferences = get_preference;
 947   solver->callbacks->get_preferences_cls = get_preference_cls;
 948   solver->callbacks->get_properties = get_properties;
 949   solver->callbacks->get_properties_cls = get_properties_cls;
 950   solver->networks_count = dest_length;
 951   solver->network_entries =
 952       GNUNET_malloc (dest_length * sizeof (struct RIL_Network));
 953   solver->bulk_lock = GNUNET_NO;
 954   solver->addresses = addresses;
 955   solver->step_count = 0;
 956
 957   for (c = 0; c < dest_length; c++)
 958   {
 959     cur = &solver->network_entries[c];
 960     cur->type = network[c];
 961     cur->bw_in_available = in_quota[c];
 962     cur->bw_in_assigned = 0;
 963     cur->bw_out_available = out_quota[c];
 964     cur->bw_out_assigned = 0;
 965   }
 966
 967   c = GNUNET_CONTAINER_multihashmap_iterate (addresses, &ril_init_agents_it,
 968       solver);
 969
 970   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
 971       "API_init() Solving ATS for %d addresses and %d peers\n", c,
 972       ril_count_agents(solver, GNUNET_NO));
 973
 974   solver->next_step = GNUNET_SCHEDULER_add_delayed (
 975       GNUNET_TIME_relative_multiply (GNUNET_TIME_relative_get_millisecond_ (),
 976           1000), &ril_periodic_step, solver);
 977
 978   return solver;
 979 }
 980
 981 /**
 982  * Shutdown the reinforcement learning problem solver
 983  *
 984  * @param solver the respective handle to shutdown
 985  */
 986 void
 987 GAS_ril_done (void * solver)
 988 {
 989   struct GAS_RIL_Handle *s = solver;
 990   struct RIL_Peer_Agent *cur_agent;
 991   struct RIL_Peer_Agent *next_agent;
 992
 993   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "API_done() Shutting down RIL solver\n");
 994
 995   cur_agent = s->agents_head;
 996   while (NULL != cur_agent)
 997   {
 998     next_agent = cur_agent->next;
 999     GNUNET_CONTAINER_DLL_remove(s->agents_head, s->agents_tail, cur_agent);
1000     agent_die (s, cur_agent);
1001     cur_agent = next_agent;
1002   }
1003
1004   GNUNET_SCHEDULER_cancel (s->next_step);
1005   GNUNET_free(s->callbacks);
1006   GNUNET_free(s->network_entries);
1007   GNUNET_free(s);
1008 }
1009
1010 /**
1011  * Add a single address within a network to the solver
1012  *
1013  * @param solver the solver Handle
1014  * @param address the address to add
1015  * @param network network type of this address
1016  */
1017 void
1018 GAS_ril_address_add (void *solver,
1019     struct ATS_Address *address,
1020     uint32_t network)
1021 {
1022   struct GAS_RIL_Handle *s = solver;
1023   //TODO! implement solver address add
1024   /*
1025    * if (new peer)
1026    *     initialize new agent
1027    * Add address
1028    * increase state vector
1029    * knowledge matrix
1030    * and action vector
1031    */
1032
1033   /*
1034    * reiterate all addresses, create new agent if necessary and give the agent the address
1035    */
1036   GNUNET_CONTAINER_multihashmap_iterate (s->addresses, &ril_init_agents_it,
1037       solver);
1038
1039   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1040       "API_address_add() Added %s address for peer '%s'\n", address->plugin,
1041       GNUNET_i2s (&address->peer));
1042 }
1043
1044 /**
1045  * Remove an address from the solver
1046  *
1047  * @param solver the solver handle
1048  * @param address the address to remove
1049  * @param session_only delete only session not whole address
1050  */
1051 void
1052 GAS_ril_address_delete (void *solver,
1053     struct ATS_Address *address,
1054     int session_only)
1055 {
1056   //TODO! implement solver address delete
1057   //TODO! delete session only
1058   /*
1059    * remove address
1060    * if (last address of peer)
1061    *     remove agent
1062    * else
1063    *     decrease state vector
1064    *     decrease knowledge matrix
1065    *     decrease action vector
1066    */
1067   struct GAS_RIL_Handle *s = solver;
1068   struct RIL_Peer_Agent *agent;
1069
1070   agent = ril_get_agent (s, &address->peer);
1071
1072   if (0 == memcmp (agent->address->addr, address->addr, address->addr_len)) //if used address deleted
1073   {
1074     agent->address = NULL; //delete address
1075     GNUNET_CONTAINER_multihashmap_iterate (s->addresses, &ril_init_agents_it,
1076         solver); //put another address
1077     if (NULL == agent->address) //no other address available
1078     {
1079       agent->active = GNUNET_NO;
1080     }
1081   }
1082
1083   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1084       "API_address_delete() Deleted %s%s address for peer '%s'\n",
1085       session_only ? "session for " : "", address->plugin,
1086       GNUNET_i2s (&address->peer));
1087 }
1088
1089 /**
1090  * Transport properties for this address have changed
1091  *
1092  * @param solver solver handle
1093  * @param address the address
1094  * @param type the ATSI type in HBO
1095  * @param abs_value the absolute value of the property
1096  * @param rel_value the normalized value
1097  */
1098 void
1099 GAS_ril_address_property_changed (void *solver,
1100     struct ATS_Address *address,
1101     uint32_t type,
1102     uint32_t abs_value,
1103     double rel_value)
1104 {
1105   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1106       "API_address_property_changed() Property `%s' for peer `%s' address %p changed "
1107           "to %.2f \n", GNUNET_ATS_print_property_type (type),
1108       GNUNET_i2s (&address->peer), address, rel_value);
1109   /*
1110    * Nothing to do here, properties are considered in every reward calculation
1111    */
1112 }
1113
1114 /**
1115  * Transport session for this address has changed
1116  *
1117  * NOTE: values in addresses are already updated
1118  *
1119  * @param solver solver handle
1120  * @param address the address
1121  * @param cur_session the current session
1122  * @param new_session the new session
1123  */
1124 void
1125 GAS_ril_address_session_changed (void *solver,
1126     struct ATS_Address *address,
1127     uint32_t cur_session,
1128     uint32_t new_session)
1129 {
1130   //TODO? consider session changed in solver behaviour
1131   /*
1132    * Potentially add session activity as a feature in state vector
1133    */
1134   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "API_address_session_changed()\n");
1135 }
1136
1137 /**
1138  * Usage for this address has changed
1139  *
1140  * NOTE: values in addresses are already updated
1141  *
1142  * @param solver solver handle
1143  * @param address the address
1144  * @param in_use usage state
1145  */
1146 void
1147 GAS_ril_address_inuse_changed (void *solver,
1148     struct ATS_Address *address,
1149     int in_use)
1150 {
1151   //TODO! consider address_inuse_changed according to matthias' email
1152   /**
1153    * See matthias' email
1154    */
1155   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1156       "API_address_inuse_changed() Usage for %s address of peer '%s' changed to %s\n",
1157       address->plugin, GNUNET_i2s (&address->peer),
1158       (GNUNET_YES == in_use) ? "USED" : "UNUSED");
1159 }
1160
1161 /**
1162  * Network scope for this address has changed
1163  *
1164  * NOTE: values in addresses are already updated
1165  *
1166  * @param solver solver handle
1167  * @param address the address
1168  * @param current_network the current network
1169  * @param new_network the new network
1170  */
1171 void
1172 GAS_ril_address_change_network (void *solver,
1173     struct ATS_Address *address,
1174     uint32_t current_network,
1175     uint32_t new_network)
1176 {
1177   struct GAS_RIL_Handle *s = solver;
1178   struct RIL_Peer_Agent *agent;
1179   struct RIL_Network *net;
1180
1181   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1182       "API_address_change_network() Network type changed, moving "
1183           "%s address of peer %s from `%s' to `%s'\n",
1184       (GNUNET_YES == address->active) ? "active" : "inactive",
1185       GNUNET_i2s (&address->peer),
1186       GNUNET_ATS_print_network_type (current_network),
1187       GNUNET_ATS_print_network_type (new_network));
1188
1189   agent = ril_get_agent (s, &address->peer);
1190
1191   if (address->active)
1192   {
1193     //remove from old network
1194     net = ril_get_network (s, current_network);
1195     net->bw_in_assigned -= agent->bw_in;
1196     net->bw_out_assigned -= agent->bw_out;
1197
1198     //add to new network
1199     net = ril_get_network (s, new_network);
1200     net->bw_in_assigned += agent->bw_in;
1201     net->bw_out_assigned += agent->bw_out;
1202
1203     GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1204         "API_address_change_network() Moved %d inbound and %d "
1205             "outbound\n", agent->bw_in, agent->bw_out);
1206   }
1207 }
1208
1209 /**
1210  * Get application feedback for a peer
1211  *
1212  * @param solver the solver handle
1213  * @param application the application
1214  * @param peer the peer to change the preference for
1215  * @param scope the time interval for this feedback: [now - scope .. now]
1216  * @param kind the kind to change the preference
1217  * @param score the score
1218  */
1219 void
1220 GAS_ril_address_preference_feedback (void *solver,
1221     void *application,
1222     const struct GNUNET_PeerIdentity *peer,
1223     const struct GNUNET_TIME_Relative scope,
1224     enum GNUNET_ATS_PreferenceKind kind,
1225     double score)
1226 {
1227   //TODO! collect reward until next reward calculation
1228   //TODO! Find out application
1229   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1230       "API_address_preference_feedback() Peer '%s' got a feedback of %+.3f from application %s for "
1231           "preference %s for %d seconds\n", GNUNET_i2s (peer), "UNKNOWN",
1232       GNUNET_ATS_print_preference_type (kind), scope.rel_value_us / 1000000);
1233 }
1234
1235 /**
1236  * Start a bulk operation
1237  *
1238  * @param solver the solver
1239  */
1240 void
1241 GAS_ril_bulk_start (void *solver)
1242 {
1243   //TODO? consideration: keep bulk counter and stop agents during bulk
1244   /*
1245    * bulk counter up, but not really relevant, because there is no complete calculation of the
1246    * bandwidth assignment triggered anyway. Therefore, changes to addresses can come and go as
1247    * they want. Consideration: Step-pause during bulk-start-stop period...
1248    */
1249   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "API_bulk_start()\n");
1250 }
1251
1252 /**
1253  * Bulk operation done
1254  */
1255 void
1256 GAS_ril_bulk_stop (void *solver)
1257 {
1258   //TODO? consideration: keep bulk counter and stop agents during bulk
1259   /*
1260    * bulk counter down, see bulk_start()
1261    */
1262   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG, "API_bulk_stop()\n");
1263 }
1264
1265 /**
1266  * Get the preferred address for a specific peer
1267  *
1268  * @param solver the solver handle
1269  * @param peer the identity of the peer
1270  */
1271 const struct ATS_Address *
1272 GAS_ril_get_preferred_address (void *solver,
1273     const struct GNUNET_PeerIdentity *peer)
1274 {
1275   /*
1276    * activate agent, return currently chosen address
1277    */
1278   struct GAS_RIL_Handle *s = solver;
1279   struct RIL_Peer_Agent *agent;
1280
1281   agent = ril_get_agent (s, peer);
1282   agent->active = GNUNET_YES;
1283
1284   GNUNET_assert(NULL != agent->address);
1285
1286   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1287       "API_get_preferred_address() Activated agent for peer '%s' with %s address\n",
1288       GNUNET_i2s (peer), agent->address->plugin);
1289
1290   return agent->address;
1291 }
1292
1293 /**
1294  * Stop notifying about address and bandwidth changes for this peer
1295  *
1296  * @param solver the solver handle
1297  * @param peer the peer
1298  */
1299 void
1300 GAS_ril_stop_get_preferred_address (void *solver,
1301     const struct GNUNET_PeerIdentity *peer)
1302 {
1303   struct GAS_RIL_Handle *s = solver;
1304   struct RIL_Peer_Agent *agent;
1305
1306   agent = ril_get_agent (s, peer);
1307   agent->active = GNUNET_NO;
1308
1309   GNUNET_log(GNUNET_ERROR_TYPE_DEBUG,
1310       "API_stop_get_preferred_address() Paused agent for peer '%s' with %s address\n",
1311       GNUNET_i2s (peer), agent->address->plugin);
1312 }
1313
1314 /* end of gnunet-service-ats-solver_ril.c */