src/ats/gnunet-service-ats-solver_ril.c

   1 /*
   2      This file is part of GNUnet.
   3      (C) 2011 Christian Grothoff (and other contributing authors)
   4
   5      GNUnet is free software; you can redistribute it and/or modify
   6      it under the terms of the GNU General Public License as published
   7      by the Free Software Foundation; either version 3, or (at your
   8      option) any later version.
   9
  10      GNUnet is distributed in the hope that it will be useful, but
  11      WITHOUT ANY WARRANTY; without even the implied warranty of
  12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13      General Public License for more details.
  14
  15      You should have received a copy of the GNU General Public License
  16      along with GNUnet; see the file COPYING.  If not, write to the
  17      Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18      Boston, MA 02111-1307, USA.
  19 */
  20
  21 /**
  22  * @file ats/gnunet-service-ats-solver_ril.c
  23  * @brief ATS reinforcement learning solver
  24  * @author Fabian Oehlmann
  25  * @author Matthias Wachs
  26  */
  27 #include "platform.h"
  28 #include "float.h"
  29 #include "gnunet_util_lib.h"
  30 #include "gnunet-service-ats_addresses.h"
  31 #include "gnunet_statistics_service.h"
  32
  33 #define RIL_DEFAULT_STEP_TIME GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 3000)
  34 #define RIL_DEFAULT_DISCOUNT_FACTOR 0.5
  35 #define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.4
  36 #define RIL_DEFAULT_TRACE_DECAY 0.6
  37 #define RIL_EXPLORE_RATIO 0.1
  38
  39 /**
  40  * ATS reinforcement learning solver
  41  *
  42  * General description
  43  */
  44
  45 enum RIL_Action_Type
  46 {
  47         RIL_ACTION_BW_IN_DBL = 0,
  48         RIL_ACTION_BW_OUT_DBL = 1,
  49         RIL_ACTION_BW_IN_HLV = 2,
  50         RIL_ACTION_BW_OUT_HLV = 3,
  51         RIL_ACTION_TYPE_NUM = 4
  52 };
  53 //TODO! add the rest of the actions
  54
  55 enum RIL_Algorithm
  56 {
  57         RIL_ALGO_SARSA,
  58         RIL_ALGO_Q
  59 };
  60
  61 enum RIL_E_Modification
  62 {
  63         RIL_E_SET,
  64         RIL_E_ZERO,
  65         RIL_E_ACCUMULATE,
  66         RIL_E_REPLACE
  67 };
  68
  69 /**
  70  * Global learning parameters
  71  */
  72 struct RIL_Learning_Parameters
  73 {
  74         /**
  75          * The TD-algorithm to use
  76          */
  77         enum RIL_Algorithm algorithm;
  78
  79         /**
  80          * Learning discount factor in the TD-update
  81          */
  82         float gamma;
  83
  84         /**
  85          * Gradient-descent step-size
  86          */
  87         float alpha;
  88
  89         /**
  90          * Trace-decay factor for eligibility traces
  91          */
  92         float lambda;
  93 };
  94
  95 struct RIL_Peer_Agent
  96 {
  97         /**
  98          * Next agent in solver's linked list
  99          */
 100         struct RIL_Peer_Agent *next;
 101
 102         /**
 103          * Previous agent in solver's linked list
 104          */
 105         struct RIL_Peer_Agent *prev;
 106
 107         /**
 108          * Environment handle
 109          */
 110         struct GAS_RIL_Handle *envi;
 111
 112         /**
 113          * Peer ID
 114          */
 115         struct GNUNET_PeerIdentity peer;
 116
 117         /**
 118          * Whether the agent is active or not
 119          */
 120         int active;
 121
 122         /**
 123         * Number of performed time-steps
 124         */
 125         unsigned long long step_count;
 126
 127         /**
 128          * Experience matrix W
 129          */
 130         double ** W;
 131
 132         /**
 133          * Number of rows of W / Number of state-vector features
 134          */
 135         int m;
 136
 137         /**
 138          * Number of columns of W / Number of actions
 139          */
 140         int n;
 141
 142         /**
 143          * Last perceived state feature vector
 144          */
 145         double * s_old;
 146
 147         /**
 148          * Last chosen action
 149          */
 150         int a_old;
 151
 152         /**
 153          * Eligibility trace vector
 154          */
 155         double * e;
 156
 157         /**
 158          * Address in use
 159          */
 160         struct ATS_Address * address;
 161
 162         /**
 163          * Inbound bandwidth assigned by the agent
 164          */
 165         unsigned long long bw_in;
 166
 167         /**
 168          * Outbound bandwidth assigned by the agent
 169          */
 170         unsigned long long bw_out;
 171 };
 172
 173 struct RIL_Network
 174 {
 175           /**
 176            * ATS network type
 177            */
 178           enum GNUNET_ATS_Network_Type type;
 179
 180           /**
 181            * Total available inbound bandwidth
 182            */
 183           unsigned long long bw_in_available;
 184
 185           /**
 186            * Total assigned outbound bandwidth
 187            */
 188           unsigned long long bw_in_assigned;
 189
 190           /**
 191            * Total available outbound bandwidth
 192            */
 193           unsigned long long bw_out_available;
 194
 195           /**
 196            * Total assigned outbound bandwidth
 197            */
 198           unsigned long long bw_out_assigned;
 199 };
 200
 201 struct RIL_Callbacks
 202 {
 203           /**
 204            * Bandwidth changed callback
 205            */
 206           GAS_bandwidth_changed_cb bw_changed;
 207
 208           /**
 209            * Bandwidth changed callback cls
 210            */
 211           void *bw_changed_cls;
 212
 213           /**
 214            * ATS function to get preferences
 215            */
 216           GAS_get_preferences get_preferences;
 217
 218           /**
 219            * Closure for ATS function to get preferences
 220            */
 221           void *get_preferences_cls;
 222
 223           /**
 224            * ATS function to get properties
 225            */
 226           GAS_get_properties get_properties;
 227
 228           /**
 229            * Closure for ATS function to get properties
 230            */
 231           void *get_properties_cls;
 232 };
 233
 234 /**
 235  * A handle for the reinforcement learning solver
 236  */
 237 struct GAS_RIL_Handle
 238 {
 239         /**
 240         * Statistics handle
 241         */
 242         struct GNUNET_STATISTICS_Handle *stats;
 243
 244         /**
 245         * Hashmap containing all valid addresses
 246         */
 247         const struct GNUNET_CONTAINER_MultiHashMap *addresses;
 248
 249         /**
 250         * Callbacks for the solver
 251         */
 252         struct RIL_Callbacks *callbacks;
 253
 254         /**
 255         * Bulk lock
 256         */
 257         int bulk_lock;
 258
 259         /**
 260         * Number of changes while solver was locked
 261         */
 262         int bulk_requests;
 263
 264         /**
 265         * Number of performed time-steps
 266         */
 267         unsigned long long step_count;
 268
 269         /**
 270         * Interval time between steps in milliseconds //TODO? put in agent
 271         */
 272         struct GNUNET_TIME_Relative step_time;
 273
 274         /**
 275         * Task identifier of the next time-step to be executed //TODO? put in agent
 276         */
 277         GNUNET_SCHEDULER_TaskIdentifier next_step;
 278
 279         /**
 280         * Learning parameters
 281         */
 282         struct RIL_Learning_Parameters parameters;
 283
 284         /**
 285         * Array of networks with global assignment state
 286         */
 287         struct RIL_Network * network_entries;
 288
 289         /**
 290         * Networks count
 291         */
 292         unsigned int networks_count;
 293
 294         /**
 295         * List of active peer-agents
 296         */
 297         struct RIL_Peer_Agent * agents_head;
 298         struct RIL_Peer_Agent * agents_tail;
 299 };
 300
 301 /**
 302  *  Private functions
 303  *  ---------------------------
 304  */
 305
 306 /**
 307  * Estimate the current action-value for state s and action a
 308  * @param agent agent performing the estimation
 309  * @param state s
 310  * @param action a
 311  * @return estimation value
 312  */
 313 static double
 314 agent_estimate_q (struct RIL_Peer_Agent *agent,
 315                 double *state,
 316                 int action)
 317 {
 318         int i;
 319         double result = 0;
 320
 321         for (i = 0; i < agent->m; i++)
 322         {
 323                 result += state[i] * (agent->W)[agent->m][action];
 324         }
 325
 326         return result;
 327 }
 328
 329 /**
 330  * Decide whether to do exploration (i.e. taking a new action) or exploitation (i.e. taking the
 331  * currently estimated best action) in the current step
 332  * @param agent agent performing the step
 333  * @return yes, if exploring
 334  */
 335 static int
 336 agent_decide_exploration (struct RIL_Peer_Agent *agent)
 337 {
 338         double r = (double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX) / (double) UINT32_MAX;
 339
 340         if (r < RIL_EXPLORE_RATIO)
 341         {
 342                 return GNUNET_YES;
 343         }
 344         return GNUNET_NO;
 345 }
 346
 347 /**
 348  * Gets the action, with the maximal estimated Q-value (i.e. the one currently estimated to bring the
 349  * most reward in the future)
 350  * @param agent agent performing the calculation
 351  * @param state the state from which to take the action
 352  * @return the action promising most future reward
 353  */
 354 static int
 355 agent_get_action_best (struct RIL_Peer_Agent *agent,
 356                 double *state)
 357 {
 358         int i;
 359         int max_i = -1;
 360         double cur_q;
 361         double max_q = DBL_MIN;
 362
 363         for (i = 0; i < agent->n; i++)
 364         {
 365                 cur_q = agent_estimate_q (agent, state, i);
 366                 if (cur_q > max_q)
 367                 {
 368                         max_q = cur_q;
 369                         max_i = i;
 370                 }
 371         }
 372
 373         GNUNET_assert(-1 != max_i);
 374
 375         return max_i;
 376 }
 377
 378 /**
 379  * Gets any action, to explore the action space from that state
 380  * @param agent agent performing the calculation
 381  * @param state the state from which to take the action
 382  * @return any action
 383  */
 384 static int
 385 agent_get_action_explore (struct RIL_Peer_Agent *agent,
 386                 double *state)
 387 {
 388         return GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, agent->n);
 389 }
 390
 391 /**
 392  * Updates the weights (i.e. coefficients) of the weight vector in matrix W for action a
 393  * @param agent the agent performing the update
 394  * @param reward the reward received for the last action
 395  * @param s_next the new state, the last step got the agent into
 396  * @param a_prime the new
 397  */
 398 static void
 399 agent_update_weights (struct RIL_Peer_Agent *agent,
 400                 double reward,
 401                 double *s_next,
 402                 int a_prime)
 403 {
 404         int i;
 405         double delta;
 406         double *theta = (agent->W)[agent->a_old];
 407
 408         delta = reward + agent_estimate_q (agent, s_next, a_prime) -
 409                         agent_estimate_q (agent, agent->s_old, agent->a_old);
 410         for (i = 0; i < agent->m; i++)
 411         {
 412                 theta[i] += agent->envi->parameters.alpha * delta * (agent->e)[i];
 413         }
 414 }
 415
 416 /**
 417  * Changes the eligibility trace vector e in various manners:
 418  * RIL_E_ACCUMULATE - adds 1 to each component as in accumulating eligibility traces
 419  * RIL_E_REPLACE - resets each component to 1 as in replacing traces
 420  * RIL_E_SET - multiplies e with gamma and lambda as in the update rule
 421  * RIL_E_ZERO - sets e to 0 as in Watkin's Q-learning algorithm when exploring and when initializing
 422  * @param agent
 423  * @param mod
 424  */
 425 static void
 426 agent_modify_eligibility (struct RIL_Peer_Agent *agent,
 427                 enum RIL_E_Modification mod)
 428 {
 429         int i;
 430         double *e = agent->e;
 431         double gamma = agent->envi->parameters.gamma;
 432         double lambda = agent->envi->parameters.lambda;
 433
 434         for (i = 0; i < agent->m; i++)
 435         {
 436                 switch (mod)
 437                 {
 438                         case RIL_E_ACCUMULATE:
 439                                 e[i] += 1;
 440                                 break;
 441                         case RIL_E_REPLACE:
 442                                 e[i] = 1;
 443                                 break;
 444                         case RIL_E_SET:
 445                                 e[i] = gamma * lambda;
 446                                 break;
 447                         case RIL_E_ZERO:
 448                                 e[i] = 0;
 449                                 break;
 450                 }
 451         }
 452 }
 453
 454 /**
 455  * Allocates a state vector and fills it with the features present
 456  * @param solver the solver handle
 457  * @return pointer to the state vector
 458  */
 459 static double *
 460 envi_get_state (struct GAS_RIL_Handle *solver)
 461 {
 462         int i;
 463         struct RIL_Network *net;
 464         double *state = GNUNET_malloc (sizeof (double) * solver->networks_count * 4);
 465
 466         for (i = 0; i < solver->networks_count; i += 4)
 467         {
 468                 net = (&solver->network_entries)[i];
 469                 state[i]   = (double) net->bw_in_assigned;
 470                 state[i+1] = (double) net->bw_in_available;
 471                 state[i+2] = (double) net->bw_out_assigned;
 472                 state[i+3] = (double) net->bw_out_available;
 473         }
 474
 475         return state;
 476 }
 477
 478 /**
 479  * Gets the reward of the last performed step
 480  * @param solver solver handle
 481  * @return the reward
 482  */
 483 static double
 484 envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
 485 {
 486         //TODO! implement reward calculation
 487
 488         return (double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX) / (double) UINT32_MAX;
 489 }
 490
 491 static void
 492 envi_action_bw_double (struct GAS_RIL_Handle *solver,
 493                 struct RIL_Peer_Agent *agent,
 494                 int direction_in)
 495 {
 496         if (direction_in)
 497         {
 498                 agent->bw_in *= 2;
 499                 agent->address->assigned_bw_in.value__ = htonl (agent->bw_in);
 500                 solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls, agent->address);
 501         }
 502         else
 503         {
 504                 agent->bw_out *= 2;
 505                 agent->address->assigned_bw_out.value__ = htonl (agent->bw_out);
 506                 solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls, agent->address);
 507         }
 508 }
 509
 510 static void
 511 envi_action_bw_halven (struct GAS_RIL_Handle *solver,
 512                 struct RIL_Peer_Agent *agent,
 513                 int direction_in)
 514 {
 515         if ((direction_in && 1 == agent->bw_in) ||
 516                         (!direction_in && 1 == agent->bw_out))
 517         {
 518                 return;
 519         }
 520         if (direction_in)
 521         {
 522                 agent->bw_in /= 2;
 523                 agent->address->assigned_bw_in.value__ = htonl (agent->bw_in);
 524                 solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls, agent->address);
 525         }
 526         else
 527         {
 528                 agent->bw_out /= 2;
 529                 agent->address->assigned_bw_out.value__ = htonl (agent->bw_out);
 530                 solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls, agent->address);
 531         }
 532 }
 533
 534 /**
 535  * Puts the action into effect
 536  * @param solver solver handle
 537  * @param action action to perform by the solver
 538  */
 539 static void
 540 envi_do_action (struct GAS_RIL_Handle *solver,
 541                 struct RIL_Peer_Agent *agent,
 542                 int action)
 543 {
 544         switch (action)
 545         {
 546                 case RIL_ACTION_BW_IN_DBL:
 547                         envi_action_bw_double (solver, agent, GNUNET_YES);
 548                         break;
 549                 case RIL_ACTION_BW_IN_HLV:
 550                         envi_action_bw_halven (solver, agent, GNUNET_YES);
 551                         break;
 552                 case RIL_ACTION_BW_OUT_DBL:
 553                         envi_action_bw_double (solver, agent, GNUNET_NO);
 554                         break;
 555                 case RIL_ACTION_BW_OUT_HLV:
 556                         envi_action_bw_halven (solver, agent, GNUNET_NO);
 557                         break;
 558         }
 559 }
 560
 561 /**
 562  * Performs one step of the Markov Decision Process. Other than in the literature the step starts
 563  * after having done the last action a_old. It observes the new state s_next and the reward
 564  * received. Then the coefficient update is done according to the SARSA or Q-learning method. The
 565  * next action is put into effect.
 566  * @param agent the agent performing the step
 567  */
 568 static void
 569 agent_step (struct RIL_Peer_Agent *agent)
 570 {
 571         int a_next = -1;
 572         double *s_next;
 573         double reward;
 574
 575         s_next = envi_get_state(agent->envi);
 576         reward = envi_get_reward(agent->envi, agent);
 577
 578         switch (agent->envi->parameters.algorithm)
 579         {
 580                 case RIL_ALGO_SARSA:
 581                         agent_modify_eligibility (agent, RIL_E_SET);
 582                         if (agent_decide_exploration (agent))
 583                         {
 584                                 a_next = agent_get_action_explore (agent, s_next);
 585                         }
 586                         else
 587                         {
 588                                 a_next = agent_get_action_best (agent, s_next);
 589                         }
 590                         //updates weights with selected action (on-policy)
 591                         agent_update_weights (agent, reward, s_next, a_next);
 592                         break;
 593
 594                 case RIL_ALGO_Q:
 595                         //updates weights with best action, disregarding actually selected action (off-policy)
 596                         a_next = agent_get_action_best (agent, s_next);
 597                         agent_update_weights (agent, reward, s_next, a_next);
 598                         if (agent_decide_exploration (agent))
 599                         {
 600                                 a_next = agent_get_action_explore (agent, s_next);
 601                                 agent_modify_eligibility(agent, RIL_E_ZERO);
 602                         }
 603                         else
 604                         {
 605                                 a_next = agent_get_action_best (agent, s_next);
 606                                 agent_modify_eligibility(agent, RIL_E_SET);
 607                         }
 608                         break;
 609         }
 610
 611         GNUNET_assert (-1 != a_next);
 612
 613         agent_modify_eligibility (agent, RIL_E_ACCUMULATE);
 614
 615         envi_do_action(agent->envi, agent, a_next);
 616
 617         GNUNET_free(agent->s_old);
 618         agent->s_old = s_next;
 619         agent->a_old = a_next;
 620
 621         agent->step_count += 1;
 622 }
 623
 624 /**
 625  * Cycles through all agents and lets the active ones do a step. Schedules the next step.
 626  * @param solver the solver handle
 627  * @param tc task context for the scheduler
 628  */
 629 static void
 630 ril_periodic_step (void *cls,
 631                                 const struct GNUNET_SCHEDULER_TaskContext *tc)
 632 {
 633         struct GAS_RIL_Handle *solver = cls;
 634         struct RIL_Peer_Agent *cur;
 635
 636         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "RIL step number %d\n", solver->step_count);
 637
 638         for (cur = solver->agents_head; NULL != cur; cur = cur->next)
 639         {
 640                 if (cur->active)
 641                 {
 642                         agent_step (cur);
 643                 }
 644         }
 645
 646         solver->step_count += 1;
 647         solver->next_step = GNUNET_SCHEDULER_add_delayed (
 648                         solver->step_time,
 649                         &ril_periodic_step,
 650                         solver);
 651 }
 652
 653 /**
 654  * Initialize an agent without addresses and its knowledge base
 655  * @param s ril solver
 656  * @param peer the one in question
 657  * @return handle to the new agent
 658  */
 659 static struct RIL_Peer_Agent *
 660 agent_init (void *s,
 661                 const struct GNUNET_PeerIdentity *peer)
 662 {
 663         int i;
 664         struct GAS_RIL_Handle * solver = s;
 665         struct RIL_Peer_Agent * agent = GNUNET_malloc (sizeof (struct RIL_Peer_Agent));
 666
 667         agent->envi = solver;
 668         agent->peer = *peer;
 669         agent->step_count = 0;
 670         agent->active = GNUNET_NO;
 671         agent->s_old = NULL;
 672         agent->n = RIL_ACTION_TYPE_NUM;
 673         agent->m = solver->networks_count * 4;
 674         agent->W = (double **) GNUNET_malloc (sizeof (double) * agent->n);
 675         for (i = 0; i < agent->n; i++)
 676         {
 677                 (agent->W)[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m);
 678         }
 679         agent->a_old = -1;
 680         agent->e = (double *) GNUNET_malloc (sizeof (double) * agent->m);
 681         agent_modify_eligibility (agent, RIL_E_ZERO);
 682
 683         GNUNET_CONTAINER_DLL_insert_tail (solver->agents_head, solver->agents_tail, agent);
 684
 685         return agent;
 686 }
 687
 688 /**
 689  * Deallocate agent
 690  * @param s solver handle
 691  * @param agent the agent to retire
 692  */
 693 static void
 694 agent_die (struct GAS_RIL_Handle *solver,
 695                 struct RIL_Peer_Agent *agent)
 696 {
 697         int i;
 698
 699         for (i = 0; i < agent->n; i++)
 700         {
 701                 GNUNET_free((agent->W)[i]);
 702         }
 703         GNUNET_free(agent->W);
 704         GNUNET_free(agent->e);
 705         GNUNET_free(agent->s_old);
 706 }
 707
 708 /**
 709  * Counts the (active) agents
 710  * @param solver solver handle
 711  * @param active_only whether only active agents should be counted
 712  * @return number of agents
 713  */
 714 static int
 715 ril_count_agents (struct GAS_RIL_Handle *solver,
 716                 int active_only)
 717 {
 718         int c;
 719         struct RIL_Peer_Agent *cur;
 720
 721         c = 0;
 722         for (cur = solver->agents_head; NULL != cur; cur = cur->next)
 723         {
 724                 if ((!active_only) || (active_only && cur->active))
 725                 {
 726                         c += 1;
 727                 }
 728         }
 729         return c;
 730 }
 731
 732 /**
 733  * Returns the agent for a peer
 734  * @param s solver handle
 735  * @param peer identity of the peer
 736  * @return agent
 737  */
 738 static struct RIL_Peer_Agent *
 739 ril_get_agent (struct GAS_RIL_Handle *solver,
 740                 const struct GNUNET_PeerIdentity *peer)
 741 {
 742         struct RIL_Peer_Agent *cur;
 743
 744         for (cur = solver->agents_head; NULL != cur; cur = cur->next)
 745         {
 746                 if (0 == GNUNET_CRYPTO_hash_cmp (&peer->hashPubKey, &cur->peer.hashPubKey))
 747                 {
 748                         return cur;
 749                 }
 750         }
 751
 752         return agent_init (solver, peer);
 753 }
 754
 755 /**
 756  * Iterator, which allocates one agent per peer
 757  *
 758  * @param cls solver
 759  * @param key peer identity
 760  * @param value address
 761  * @return whether iterator should continue
 762  */
 763 static int
 764 ril_init_agents_it (void *cls,
 765                                 const struct GNUNET_HashCode *key,
 766                                 void *value)
 767 {
 768         struct GAS_RIL_Handle *solver = cls;
 769         struct ATS_Address *address = value;
 770         struct RIL_Peer_Agent *agent;
 771         uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
 772
 773         agent = ril_get_agent (solver, &address->peer);
 774
 775         GNUNET_assert (NULL != agent);
 776
 777         if (NULL == agent->address)
 778         {
 779                 agent->address = address;
 780                 agent->address->active = GNUNET_YES;
 781                 agent->bw_in = min_bw;
 782                 agent->address->assigned_bw_in.value__ = htonl (min_bw);
 783                 agent->bw_out = min_bw;
 784                 agent->address->assigned_bw_out.value__ = htonl (min_bw);
 785         }
 786
 787         return GNUNET_YES;
 788 }
 789
 790 /**
 791  * Lookup network struct by type
 792  *
 793  * @param s the solver handle
 794  * @param type the network type
 795  * @return the network struct
 796  */
 797 static struct RIL_Network *
 798 ril_get_network (struct GAS_RIL_Handle *s, uint32_t type)
 799 {
 800   int i;
 801   for (i = 0 ; i < s->networks_count; i++)
 802   {
 803       if (s->network_entries[i].type == type)
 804         return &s->network_entries[i];
 805
 806   }
 807   return NULL;
 808 }
 809
 810
 811 /**
 812  *  Solver API functions
 813  *  ---------------------------
 814  */
 815
 816 /**
 817  * Changes the preferences for a peer in the problem
 818  *
 819  * @param solver the solver handle
 820  * @param peer the peer to change the preference for
 821  * @param kind the kind to change the preference
 822  * @param pref_rel the normalized preference value for this kind over all clients
 823  */
 824 void
 825 GAS_ril_address_change_preference (void *s,
 826                 const struct GNUNET_PeerIdentity *peer,
 827                 enum GNUNET_ATS_PreferenceKind kind,
 828                 double pref_rel)
 829 {
 830           GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
 831                       "API_address_change_preference() Preference `%s' for peer `%s' changed to %.2f \n",
 832                       GNUNET_ATS_print_preference_type (kind),
 833                       GNUNET_i2s (peer),
 834                       pref_rel);
 835           /*
 836            * Nothing to do here. Preferences are considered during reward calculation.
 837            */
 838 }
 839
 840
 841 /**
 842  * Init the reinforcement learning problem solver
 843  *
 844  * Quotas:
 845  * network[i] contains the network type as type GNUNET_ATS_NetworkType[i]
 846  * out_quota[i] contains outbound quota for network type i
 847  * in_quota[i] contains inbound quota for network type i
 848  *
 849  * Example
 850  * network = {GNUNET_ATS_NET_UNSPECIFIED, GNUNET_ATS_NET_LOOPBACK, GNUNET_ATS_NET_LAN, GNUNET_ATS_NET_WAN, GNUNET_ATS_NET_WLAN}
 851  * network[2]   == GNUNET_ATS_NET_LAN
 852  * out_quota[2] == 65353
 853  * in_quota[2]  == 65353
 854  *
 855  * @param cfg configuration handle
 856  * @param stats the GNUNET_STATISTICS handle
 857  * @param network array of GNUNET_ATS_NetworkType with length dest_length
 858  * @param addresses hashmap containing all addresses
 859  * @param out_quota array of outbound quotas
 860  * @param in_quota array of outbound quota
 861  * @param dest_length array length for quota arrays
 862  * @param bw_changed_cb callback for changed bandwidth amounts
 863  * @param bw_changed_cb_cls cls for callback
 864  * @param get_preference callback to get relative preferences for a peer
 865  * @param get_preference_cls cls for callback to get relative preferences
 866  * @param get_properties_cls for callback to get relative properties
 867  * @param get_properties_cls cls for callback to get relative properties
 868  * @return handle for the solver on success, NULL on fail
 869  */
 870 void *
 871 GAS_ril_init (const struct GNUNET_CONFIGURATION_Handle *cfg,
 872                 const struct GNUNET_STATISTICS_Handle *stats,
 873                 const struct GNUNET_CONTAINER_MultiHashMap *addresses,
 874                 int *network,
 875                 unsigned long long *out_quota,
 876                 unsigned long long *in_quota,
 877                 int dest_length,
 878                 GAS_bandwidth_changed_cb bw_changed_cb,
 879                 void *bw_changed_cb_cls,
 880                 GAS_get_preferences get_preference,
 881                 void *get_preference_cls,
 882                 GAS_get_properties get_properties,
 883                 void *get_properties_cls)
 884 {
 885         int c;
 886         unsigned long long tmp;
 887         struct RIL_Network * cur;
 888         struct GAS_RIL_Handle *solver = GNUNET_malloc (sizeof (struct GAS_RIL_Handle));
 889
 890         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "API_init() Initializing RIL solver\n");
 891
 892         GNUNET_assert (NULL != cfg);
 893         GNUNET_assert (NULL != stats);
 894         GNUNET_assert (NULL != network);
 895         GNUNET_assert (NULL != bw_changed_cb);
 896         GNUNET_assert (NULL != get_preference);
 897         GNUNET_assert (NULL != get_properties);
 898
 899         if (GNUNET_OK != GNUNET_CONFIGURATION_get_value_time(cfg, "ats", "RIL_STEP_TIME", &solver->step_time))
 900         {
 901                 solver->step_time = RIL_DEFAULT_STEP_TIME;
 902         }
 903         if (GNUNET_OK == GNUNET_CONFIGURATION_get_value_size(cfg, "ats", "RIL_DISCOUNT_FACTOR", &tmp))
 904         {
 905                 solver->parameters.gamma = (double) tmp / 100;;
 906         }
 907         else
 908         {
 909                 solver->parameters.gamma = RIL_DEFAULT_DISCOUNT_FACTOR;
 910         }
 911         if (GNUNET_OK == GNUNET_CONFIGURATION_get_value_size(cfg, "ats", "RIL_GRADIENT_STEP_SIZE", &tmp))
 912         {
 913                 solver->parameters.alpha = (double) tmp / 100;;
 914         }
 915         else
 916         {
 917                 solver->parameters.alpha = RIL_DEFAULT_GRADIENT_STEP_SIZE;
 918         }
 919         if (GNUNET_OK == GNUNET_CONFIGURATION_get_value_size(cfg, "ats", "RIL_TRACE_DECAY", &tmp))
 920         {
 921                 solver->parameters.lambda = (double) tmp / 100;;
 922         }
 923         else
 924         {
 925                 solver->parameters.lambda = RIL_DEFAULT_TRACE_DECAY;
 926         }
 927
 928         solver->stats = (struct GNUNET_STATISTICS_Handle *) stats;
 929         solver->callbacks = GNUNET_malloc (sizeof (struct RIL_Callbacks));
 930         solver->callbacks->bw_changed = bw_changed_cb;
 931         solver->callbacks->bw_changed_cls = bw_changed_cb_cls;
 932         solver->callbacks->get_preferences = get_preference;
 933         solver->callbacks->get_preferences_cls = get_preference_cls;
 934         solver->callbacks->get_properties = get_properties;
 935         solver->callbacks->get_properties_cls = get_properties_cls;
 936         solver->networks_count = dest_length;
 937         solver->network_entries = GNUNET_malloc (dest_length * sizeof (struct RIL_Network));
 938         solver->bulk_lock = GNUNET_NO;
 939         solver->addresses = addresses;
 940         solver->step_count = 0;
 941
 942         for (c = 0; c < dest_length; c++)
 943         {
 944                 cur = &solver->network_entries[c];
 945                 cur->type = network[c];
 946                 cur->bw_in_available = in_quota[c];
 947                 cur->bw_in_assigned = 0;
 948                 cur->bw_out_available = out_quota[c];
 949                 cur->bw_out_assigned = 0;
 950         }
 951
 952         c = GNUNET_CONTAINER_multihashmap_iterate (addresses, &ril_init_agents_it, solver);
 953
 954         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "API_init() Solving ATS for %d addresses and %d peers\n",
 955                         c,
 956                         ril_count_agents(solver, GNUNET_NO));
 957
 958         solver->next_step = GNUNET_SCHEDULER_add_delayed (
 959                                 GNUNET_TIME_relative_multiply (GNUNET_TIME_relative_get_millisecond_ (), 1000),
 960                                 &ril_periodic_step,
 961                                 solver);
 962
 963         return solver;
 964 }
 965
 966 /**
 967  * Shutdown the reinforcement learning problem solver
 968  *
 969  * @param solver the respective handle to shutdown
 970  */
 971 void
 972 GAS_ril_done (void * solver)
 973 {
 974         struct GAS_RIL_Handle *s = solver;
 975         struct RIL_Peer_Agent *cur_agent;
 976         struct RIL_Peer_Agent *next_agent;
 977
 978         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "API_done() Shutting down RIL solver\n");
 979
 980         cur_agent = s->agents_head;
 981         while (NULL != cur_agent)
 982         {
 983                 next_agent = cur_agent->next;
 984                 GNUNET_CONTAINER_DLL_remove (s->agents_head, s->agents_tail, cur_agent);
 985                 agent_die (s, cur_agent);
 986                 cur_agent = next_agent;
 987         }
 988
 989         GNUNET_SCHEDULER_cancel (s->next_step);
 990         GNUNET_free (s->callbacks);
 991         GNUNET_free (s->network_entries);
 992         GNUNET_free (s);
 993 }
 994
 995
 996 /**
 997  * Add a single address within a network to the solver
 998  *
 999  * @param solver the solver Handle
1000  * @param address the address to add
1001  * @param network network type of this address
1002  */
1003 void
1004 GAS_ril_address_add (void *solver,
1005                 struct ATS_Address *address,
1006                 uint32_t network)
1007 {
1008         struct GAS_RIL_Handle *s = solver;
1009         //TODO! implement solver address add
1010         /*
1011          * if (new peer)
1012          *     initialize new agent
1013          * Add address
1014          * increase state vector
1015          * knowledge matrix
1016          * and action vector
1017          */
1018
1019         /*
1020          * reiterate all addresses, create new agent if necessary and give the agent the address
1021          */
1022         GNUNET_CONTAINER_multihashmap_iterate (s->addresses, &ril_init_agents_it, solver);
1023
1024         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "API_address_add() Added %s address for peer '%s'\n",
1025                         address->plugin, GNUNET_i2s (&address->peer));
1026 }
1027
1028 /**
1029  * Remove an address from the solver
1030  *
1031  * @param solver the solver handle
1032  * @param address the address to remove
1033  * @param session_only delete only session not whole address
1034  */
1035 void
1036 GAS_ril_address_delete (void *solver,
1037                 struct ATS_Address *address,
1038                 int session_only)
1039 {
1040         //TODO! implement solver address delete
1041         //TODO! delete session only
1042         /*
1043          * remove address
1044          * if (last address of peer)
1045          *     remove agent
1046          * else
1047          *     decrease state vector
1048          *     decrease knowledge matrix
1049          *     decrease action vector
1050          */
1051         struct GAS_RIL_Handle *s = solver;
1052         struct RIL_Peer_Agent *agent;
1053
1054         agent = ril_get_agent(s, &address->peer);
1055
1056         if (0 == memcmp (agent->address->addr, address->addr, address->addr_len)) //if used address deleted
1057         {
1058                 agent->address = NULL; //delete address
1059                 GNUNET_CONTAINER_multihashmap_iterate (s->addresses, &ril_init_agents_it, solver); //put another address
1060                 if (NULL == agent->address) //no other address available
1061                 {
1062                         agent->active = GNUNET_NO;
1063                 }
1064         }
1065
1066         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
1067                         "API_address_delete() Deleted %s%s address for peer '%s'\n",
1068                         session_only ? "session for " : "",
1069                         address->plugin,
1070                         GNUNET_i2s(&address->peer));
1071 }
1072
1073 /**
1074  * Transport properties for this address have changed
1075  *
1076  * @param solver solver handle
1077  * @param address the address
1078  * @param type the ATSI type in HBO
1079  * @param abs_value the absolute value of the property
1080  * @param rel_value the normalized value
1081  */
1082 void
1083 GAS_ril_address_property_changed (void *solver,
1084                 struct ATS_Address *address,
1085                 uint32_t type,
1086                 uint32_t abs_value,
1087                 double rel_value)
1088 {
1089           GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
1090                       "API_address_property_changed() Property `%s' for peer `%s' address %p changed "
1091                       "to %.2f \n",
1092                       GNUNET_ATS_print_property_type (type),
1093                       GNUNET_i2s (&address->peer),
1094                       address, rel_value);
1095           /*
1096            * Nothing to do here, properties are considered in every reward calculation
1097            */
1098 }
1099
1100
1101 /**
1102  * Transport session for this address has changed
1103  *
1104  * NOTE: values in addresses are already updated
1105  *
1106  * @param solver solver handle
1107  * @param address the address
1108  * @param cur_session the current session
1109  * @param new_session the new session
1110  */
1111 void
1112 GAS_ril_address_session_changed (void *solver,
1113                 struct ATS_Address *address,
1114                 uint32_t cur_session,
1115                 uint32_t new_session)
1116 {
1117         //TODO? consider session changed in solver behaviour
1118         /*
1119          * Potentially add session activity as a feature in state vector
1120          */
1121         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "API_address_session_changed()\n");
1122 }
1123
1124
1125 /**
1126  * Usage for this address has changed
1127  *
1128  * NOTE: values in addresses are already updated
1129  *
1130  * @param solver solver handle
1131  * @param address the address
1132  * @param in_use usage state
1133  */
1134 void
1135 GAS_ril_address_inuse_changed (void *solver,
1136                 struct ATS_Address *address,
1137                 int in_use)
1138 {
1139         //TODO! consider address_inuse_changed according to matthias' email
1140         /**
1141          * See matthias' email
1142          */
1143         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
1144                         "API_address_inuse_changed() Usage for %s address of peer '%s' changed to %s\n",
1145                         address->plugin,
1146                         GNUNET_i2s (&address->peer),
1147                         (GNUNET_YES == in_use) ? "USED" : "UNUSED");
1148 }
1149
1150 /**
1151  * Network scope for this address has changed
1152  *
1153  * NOTE: values in addresses are already updated
1154  *
1155  * @param solver solver handle
1156  * @param address the address
1157  * @param current_network the current network
1158  * @param new_network the new network
1159  */
1160 void
1161 GAS_ril_address_change_network (void *solver,
1162                 struct ATS_Address *address,
1163                 uint32_t current_network,
1164                 uint32_t new_network)
1165 {
1166         struct GAS_RIL_Handle *s = solver;
1167         struct RIL_Peer_Agent *agent;
1168         struct RIL_Network *net;
1169
1170         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "API_address_change_network() Network type changed, moving "
1171                         "%s address of peer %s from `%s' to `%s'\n",
1172                                 (GNUNET_YES == address->active) ? "active" : "inactive",
1173                                  GNUNET_i2s (&address->peer),
1174                                  GNUNET_ATS_print_network_type (current_network),
1175                                  GNUNET_ATS_print_network_type (new_network));
1176
1177         agent = ril_get_agent(s, &address->peer);
1178
1179         if (address->active)
1180         {
1181                 //remove from old network
1182                 net = ril_get_network (s, current_network);
1183                 net->bw_in_assigned -= agent->bw_in;
1184                 net->bw_out_assigned -= agent->bw_out;
1185
1186                 //add to new network
1187                 net = ril_get_network (s, new_network);
1188                 net->bw_in_assigned += agent->bw_in;
1189                 net->bw_out_assigned += agent->bw_out;
1190
1191                 GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "API_address_change_network() Moved %d inbound and %d "
1192                                 "outbound\n", agent->bw_in, agent->bw_out);
1193         }
1194 }
1195
1196 /**
1197  * Get application feedback for a peer
1198  *
1199  * @param solver the solver handle
1200  * @param application the application
1201  * @param peer the peer to change the preference for
1202  * @param scope the time interval for this feedback: [now - scope .. now]
1203  * @param kind the kind to change the preference
1204  * @param score the score
1205  */
1206 void
1207 GAS_ril_address_preference_feedback (void *solver,
1208                 void *application,
1209                 const struct GNUNET_PeerIdentity *peer,
1210                 const struct GNUNET_TIME_Relative scope,
1211                 enum GNUNET_ATS_PreferenceKind kind,
1212                 double score)
1213 {
1214         //TODO! collect reward until next reward calculation
1215         //TODO! Find out application
1216         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
1217                         "API_address_preference_feedback() Peer '%s' got a feedback of %+.3f from application %s for "
1218                         "preference %s for %d seconds\n",
1219                         GNUNET_i2s (peer),
1220                         "UNKNOWN",
1221                         GNUNET_ATS_print_preference_type(kind),
1222                         scope.rel_value_us / 1000000);
1223 }
1224
1225 /**
1226  * Start a bulk operation
1227  *
1228  * @param solver the solver
1229  */
1230 void
1231 GAS_ril_bulk_start (void *solver)
1232 {
1233         //TODO? consideration: keep bulk counter and stop agents during bulk
1234         /*
1235          * bulk counter up, but not really relevant, because there is no complete calculation of the
1236          * bandwidth assignment triggered anyway. Therefore, changes to addresses can come and go as
1237          * they want. Consideration: Step-pause during bulk-start-stop period...
1238          */
1239         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "API_bulk_start()\n");
1240 }
1241
1242
1243 /**
1244  * Bulk operation done
1245  */
1246 void
1247 GAS_ril_bulk_stop (void *solver)
1248 {
1249         //TODO? consideration: keep bulk counter and stop agents during bulk
1250         /*
1251          * bulk counter down, see bulk_start()
1252          */
1253         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "API_bulk_stop()\n");
1254 }
1255
1256 /**
1257  * Get the preferred address for a specific peer
1258  *
1259  * @param solver the solver handle
1260  * @param peer the identity of the peer
1261  */
1262 const struct ATS_Address *
1263 GAS_ril_get_preferred_address (void *solver,
1264                 const struct GNUNET_PeerIdentity *peer)
1265 {
1266         /*
1267          * activate agent, return currently chosen address
1268          */
1269         struct GAS_RIL_Handle *s = solver;
1270         struct RIL_Peer_Agent *agent;
1271
1272         agent = ril_get_agent(s, peer);
1273         agent->active = GNUNET_YES;
1274
1275         GNUNET_assert (NULL != agent->address);
1276
1277         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
1278                         "API_get_preferred_address() Activated agent for peer '%s' with %s address\n",
1279                         GNUNET_i2s (peer),
1280                         agent->address->plugin);
1281
1282         return agent->address;
1283 }
1284
1285 /**
1286  * Stop notifying about address and bandwidth changes for this peer
1287  *
1288  * @param solver the solver handle
1289  * @param peer the peer
1290  */
1291 void
1292 GAS_ril_stop_get_preferred_address (void *solver,
1293                 const struct GNUNET_PeerIdentity *peer)
1294 {
1295         struct GAS_RIL_Handle *s = solver;
1296         struct RIL_Peer_Agent *agent;
1297
1298         agent = ril_get_agent(s, peer);
1299         agent->active = GNUNET_NO;
1300
1301         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
1302                                 "API_stop_get_preferred_address() Paused agent for peer '%s' with %s address\n",
1303                                 GNUNET_i2s (peer),
1304                                 agent->address->plugin);
1305 }
1306
1307 /* end of gnunet-service-ats-solver_ril.c */