src/ats/gnunet-service-ats-solver_ril.c

   1 /*
   2      This file is part of GNUnet.
   3      (C) 2011 Christian Grothoff (and other contributing authors)
   4
   5      GNUnet is free software; you can redistribute it and/or modify
   6      it under the terms of the GNU General Public License as published
   7      by the Free Software Foundation; either version 3, or (at your
   8      option) any later version.
   9
  10      GNUnet is distributed in the hope that it will be useful, but
  11      WITHOUT ANY WARRANTY; without even the implied warranty of
  12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13      General Public License for more details.
  14
  15      You should have received a copy of the GNU General Public License
  16      along with GNUnet; see the file COPYING.  If not, write to the
  17      Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  18      Boston, MA 02111-1307, USA.
  19 */
  20
  21 /**
  22  * @file ats/gnunet-service-ats-solver_ril.c
  23  * @brief ATS reinforcement learning solver
  24  * @author Fabian Oehlmann
  25  * @author Matthias Wachs
  26  */
  27 #include "platform.h"
  28 #include "float.h"
  29 #include "gnunet_util_lib.h"
  30 #include "gnunet-service-ats_addresses.h"
  31 #include "gnunet_statistics_service.h"
  32
  33 #define RIL_DEFAULT_STEP_TIME GNUNET_TIME_relative_multiply (GNUNET_TIME_UNIT_MILLISECONDS, 3000)
  34 #define RIL_DEFAULT_DISCOUNT_FACTOR 0.5
  35 #define RIL_DEFAULT_GRADIENT_STEP_SIZE 0.4
  36 #define RIL_DEFAULT_TRACE_DECAY 0.6
  37 #define RIL_EXPLORE_RATIO 0.1
  38
  39 /**
  40  * ATS reinforcement learning solver
  41  *
  42  * General description
  43  */
  44
  45 enum RIL_Action_Type
  46 {
  47         RIL_ACTION_BW_IN_DBL = 0,
  48         RIL_ACTION_BW_OUT_DBL = 1,
  49         RIL_ACTION_BW_IN_HLV = 2,
  50         RIL_ACTION_BW_OUT_HLV = 3,
  51         RIL_ACTION_TYPE_NUM = 4
  52 };
  53 //TODO! add the rest of the actions
  54
  55 enum RIL_Algorithm
  56 {
  57         RIL_ALGO_SARSA,
  58         RIL_ALGO_Q
  59 };
  60
  61 enum RIL_E_Modification
  62 {
  63         RIL_E_SET,
  64         RIL_E_ZERO,
  65         RIL_E_ACCUMULATE,
  66         RIL_E_REPLACE
  67 };
  68
  69 /**
  70  * Global learning parameters
  71  */
  72 struct RIL_Learning_Parameters
  73 {
  74         /**
  75          * The TD-algorithm to use
  76          */
  77         enum RIL_Algorithm algorithm;
  78
  79         /**
  80          * Learning discount factor in the TD-update
  81          */
  82         float gamma;
  83
  84         /**
  85          * Gradient-descent step-size
  86          */
  87         float alpha;
  88
  89         /**
  90          * Trace-decay factor for eligibility traces
  91          */
  92         float lambda;
  93 };
  94
  95 struct RIL_Peer_Agent
  96 {
  97         /**
  98          * Next agent in solver's linked list
  99          */
 100         struct RIL_Peer_Agent *next;
 101
 102         /**
 103          * Previous agent in solver's linked list
 104          */
 105         struct RIL_Peer_Agent *prev;
 106
 107         /**
 108          * Environment handle
 109          */
 110         struct GAS_RIL_Handle *envi;
 111
 112         /**
 113          * Peer ID
 114          */
 115         struct GNUNET_PeerIdentity peer;
 116
 117         /**
 118          * Whether the agent is active or not
 119          */
 120         int active;
 121
 122         /**
 123         * Number of performed time-steps
 124         */
 125         unsigned long long step_count;
 126
 127         /**
 128          * Experience matrix W
 129          */
 130         double ** W;
 131
 132         /**
 133          * Number of rows of W / Number of state-vector features
 134          */
 135         int m;
 136
 137         /**
 138          * Number of columns of W / Number of actions
 139          */
 140         int n;
 141
 142         /**
 143          * Last perceived state feature vector
 144          */
 145         double * s_old;
 146
 147         /**
 148          * Last chosen action
 149          */
 150         int a_old;
 151
 152         /**
 153          * Eligibility trace vector
 154          */
 155         double * e;
 156
 157         /**
 158          * Address in use
 159          */
 160         struct ATS_Address * address;
 161
 162         /**
 163          * Inbound bandwidth assigned by the agent
 164          */
 165         unsigned long long bw_in;
 166
 167         /**
 168          * Outbound bandwidth assigned by the agent
 169          */
 170         unsigned long long bw_out;
 171 };
 172
 173 struct RIL_Network
 174 {
 175           /**
 176            * ATS network type
 177            */
 178           enum GNUNET_ATS_Network_Type type;
 179
 180           /**
 181            * Total available inbound bandwidth
 182            */
 183           unsigned long long bw_in_available;
 184
 185           /**
 186            * Total assigned outbound bandwidth
 187            */
 188           unsigned long long bw_in_assigned;
 189
 190           /**
 191            * Total available outbound bandwidth
 192            */
 193           unsigned long long bw_out_available;
 194
 195           /**
 196            * Total assigned outbound bandwidth
 197            */
 198           unsigned long long bw_out_assigned;
 199 };
 200
 201 struct RIL_Callbacks
 202 {
 203           /**
 204            * Bandwidth changed callback
 205            */
 206           GAS_bandwidth_changed_cb bw_changed;
 207
 208           /**
 209            * Bandwidth changed callback cls
 210            */
 211           void *bw_changed_cls;
 212
 213           /**
 214            * ATS function to get preferences
 215            */
 216           GAS_get_preferences get_preferences;
 217
 218           /**
 219            * Closure for ATS function to get preferences
 220            */
 221           void *get_preferences_cls;
 222
 223           /**
 224            * ATS function to get properties
 225            */
 226           GAS_get_properties get_properties;
 227
 228           /**
 229            * Closure for ATS function to get properties
 230            */
 231           void *get_properties_cls;
 232 };
 233
 234 /**
 235  * A handle for the reinforcement learning solver
 236  */
 237 struct GAS_RIL_Handle
 238 {
 239         /**
 240         * Statistics handle
 241         */
 242         struct GNUNET_STATISTICS_Handle *stats;
 243
 244         /**
 245         * Hashmap containing all valid addresses
 246         */
 247         const struct GNUNET_CONTAINER_MultiHashMap *addresses;
 248
 249         /**
 250         * Callbacks for the solver
 251         */
 252         struct RIL_Callbacks *callbacks;
 253
 254         /**
 255         * Bulk lock
 256         */
 257         int bulk_lock;
 258
 259         /**
 260         * Number of changes while solver was locked
 261         */
 262         int bulk_requests;
 263
 264         /**
 265         * Number of performed time-steps
 266         */
 267         unsigned long long step_count;
 268
 269         /**
 270         * Interval time between steps in milliseconds //TODO? put in agent
 271         */
 272         struct GNUNET_TIME_Relative step_time;
 273
 274         /**
 275         * Task identifier of the next time-step to be executed //TODO? put in agent
 276         */
 277         GNUNET_SCHEDULER_TaskIdentifier next_step;
 278
 279         /**
 280         * Learning parameters
 281         */
 282         struct RIL_Learning_Parameters parameters;
 283
 284         /**
 285         * Array of networks with global assignment state
 286         */
 287         struct RIL_Network * network_entries;
 288
 289         /**
 290         * Networks count
 291         */
 292         unsigned int networks_count;
 293
 294         /**
 295         * List of active peer-agents
 296         */
 297         struct RIL_Peer_Agent * agents_head;
 298         struct RIL_Peer_Agent * agents_tail;
 299 };
 300
 301 /**
 302  *  Private functions
 303  *  ---------------------------
 304  */
 305
 306 /**
 307  * Estimate the current action-value for state s and action a
 308  * @param agent agent performing the estimation
 309  * @param state s
 310  * @param action a
 311  * @return estimation value
 312  */
 313 static double
 314 agent_estimate_q (struct RIL_Peer_Agent *agent,
 315                 double *state,
 316                 int action)
 317 {
 318         int i;
 319         double result = 0;
 320
 321         for (i = 0; i < agent->m; i++)
 322         {
 323                 result += state[i] * (agent->W)[agent->m][action];
 324         }
 325
 326         return result;
 327 }
 328
 329 /**
 330  * Decide whether to do exploration (i.e. taking a new action) or exploitation (i.e. taking the
 331  * currently estimated best action) in the current step
 332  * @param agent agent performing the step
 333  * @return yes, if exploring
 334  */
 335 static int
 336 agent_decide_exploration (struct RIL_Peer_Agent *agent)
 337 {
 338         double r = (double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX) / (double) UINT32_MAX;
 339
 340         if (r < RIL_EXPLORE_RATIO)
 341         {
 342                 return GNUNET_YES;
 343         }
 344         return GNUNET_NO;
 345 }
 346
 347 /**
 348  * Gets the action, with the maximal estimated Q-value (i.e. the one currently estimated to bring the
 349  * most reward in the future)
 350  * @param agent agent performing the calculation
 351  * @param state the state from which to take the action
 352  * @return the action promising most future reward
 353  */
 354 static int
 355 agent_get_action_best (struct RIL_Peer_Agent *agent,
 356                 double *state)
 357 {
 358         int i;
 359         int max_i = -1;
 360         double cur_q;
 361         double max_q = DBL_MIN;
 362
 363         for (i = 0; i < agent->n; i++)
 364         {
 365                 cur_q = agent_estimate_q (agent, state, i);
 366                 if (cur_q > max_q)
 367                 {
 368                         max_q = cur_q;
 369                         max_i = i;
 370                 }
 371         }
 372
 373         GNUNET_assert(-1 != max_i);
 374
 375         return max_i;
 376 }
 377
 378 /**
 379  * Gets any action, to explore the action space from that state
 380  * @param agent agent performing the calculation
 381  * @param state the state from which to take the action
 382  * @return any action
 383  */
 384 static int
 385 agent_get_action_explore (struct RIL_Peer_Agent *agent,
 386                 double *state)
 387 {
 388         return GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, agent->n);
 389 }
 390
 391 /**
 392  * Updates the weights (i.e. coefficients) of the weight vector in matrix W for action a
 393  * @param agent the agent performing the update
 394  * @param reward the reward received for the last action
 395  * @param s_next the new state, the last step got the agent into
 396  * @param a_prime the new
 397  */
 398 static void
 399 agent_update_weights (struct RIL_Peer_Agent *agent,
 400                 double reward,
 401                 double *s_next,
 402                 int a_prime)
 403 {
 404         int i;
 405         double delta;
 406         double *theta = (agent->W)[agent->a_old];
 407
 408         delta = reward + agent_estimate_q (agent, s_next, a_prime) -
 409                         agent_estimate_q (agent, agent->s_old, agent->a_old);
 410         for (i = 0; i < agent->m; i++)
 411         {
 412                 theta[i] += agent->envi->parameters.alpha * delta * (agent->e)[i];
 413         }
 414 }
 415
 416 /**
 417  * Changes the eligibility trace vector e in various manners:
 418  * RIL_E_ACCUMULATE - adds 1 to each component as in accumulating eligibility traces
 419  * RIL_E_REPLACE - resets each component to 1 as in replacing traces
 420  * RIL_E_SET - multiplies e with gamma and lambda as in the update rule
 421  * RIL_E_ZERO - sets e to 0 as in Watkin's Q-learning algorithm when exploring and when initializing
 422  * @param agent
 423  * @param mod
 424  */
 425 static void
 426 agent_modify_eligibility (struct RIL_Peer_Agent *agent,
 427                 enum RIL_E_Modification mod)
 428 {
 429         int i;
 430         double *e = agent->e;
 431         double gamma = agent->envi->parameters.gamma;
 432         double lambda = agent->envi->parameters.lambda;
 433
 434         for (i = 0; i < agent->m; i++)
 435         {
 436                 switch (mod)
 437                 {
 438                         case RIL_E_ACCUMULATE:
 439                                 e[i] += 1;
 440                                 break;
 441                         case RIL_E_REPLACE:
 442                                 e[i] = 1;
 443                                 break;
 444                         case RIL_E_SET:
 445                                 e[i] = gamma * lambda;
 446                                 break;
 447                         case RIL_E_ZERO:
 448                                 e[i] = 0;
 449                                 break;
 450                 }
 451         }
 452 }
 453
 454 /**
 455  * Allocates a state vector and fills it with the features present
 456  * @param solver the solver handle
 457  * @return pointer to the state vector
 458  */
 459 static double *
 460 envi_get_state (struct GAS_RIL_Handle *solver)
 461 {
 462         int i;
 463         struct RIL_Network *net;
 464         double *state = GNUNET_malloc (sizeof (double) * solver->networks_count * 4);
 465
 466         for (i = 0; i < solver->networks_count; i += 4)
 467         {
 468                 net = (&solver->network_entries)[i];
 469                 state[i]   = (double) net->bw_in_assigned;
 470                 state[i+1] = (double) net->bw_in_available;
 471                 state[i+2] = (double) net->bw_out_assigned;
 472                 state[i+3] = (double) net->bw_out_available;
 473         }
 474
 475         return state;
 476 }
 477
 478 /**
 479  * Gets the reward of the last performed step
 480  * @param solver solver handle
 481  * @return the reward
 482  */
 483 static double
 484 envi_get_reward (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
 485 {
 486         //TODO! implement reward calculation
 487
 488         return (double) GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, UINT32_MAX) / (double) UINT32_MAX;
 489 }
 490
 491 static void
 492 envi_action_bw_double (struct GAS_RIL_Handle *solver,
 493                 struct RIL_Peer_Agent *agent,
 494                 int direction_in)
 495 {
 496         if (direction_in)
 497         {
 498                 agent->bw_in *= 2;
 499                 agent->address->assigned_bw_in.value__ = htonl (agent->bw_in);
 500                 solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls, agent->address);
 501         }
 502         else
 503         {
 504                 agent->bw_out *= 2;
 505                 agent->address->assigned_bw_out.value__ = htonl (agent->bw_out);
 506                 solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls, agent->address);
 507         }
 508 }
 509
 510 static void
 511 envi_action_bw_halven (struct GAS_RIL_Handle *solver,
 512                 struct RIL_Peer_Agent *agent,
 513                 int direction_in)
 514 {
 515         if ((direction_in && 1 == agent->bw_in) ||
 516                         (!direction_in && 1 == agent->bw_out))
 517         {
 518                 return;
 519         }
 520         if (direction_in)
 521         {
 522                 agent->bw_in /= 2;
 523                 agent->address->assigned_bw_in.value__ = htonl (agent->bw_in);
 524                 solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls, agent->address);
 525         }
 526         else
 527         {
 528                 agent->bw_out /= 2;
 529                 agent->address->assigned_bw_out.value__ = htonl (agent->bw_out);
 530                 solver->callbacks->bw_changed (solver->callbacks->bw_changed_cls, agent->address);
 531         }
 532 }
 533
 534 /**
 535  * Puts the action into effect
 536  * @param solver solver handle
 537  * @param action action to perform by the solver
 538  */
 539 static void
 540 envi_do_action (struct GAS_RIL_Handle *solver,
 541                 struct RIL_Peer_Agent *agent,
 542                 int action)
 543 {
 544         switch (action)
 545         {
 546                 case RIL_ACTION_BW_IN_DBL:
 547                         envi_action_bw_double (solver, agent, GNUNET_YES);
 548                         break;
 549                 case RIL_ACTION_BW_IN_HLV:
 550                         envi_action_bw_halven (solver, agent, GNUNET_YES);
 551                         break;
 552                 case RIL_ACTION_BW_OUT_DBL:
 553                         envi_action_bw_double (solver, agent, GNUNET_NO);
 554                         break;
 555                 case RIL_ACTION_BW_OUT_HLV:
 556                         envi_action_bw_halven (solver, agent, GNUNET_NO);
 557                         break;
 558         }
 559 }
 560
 561 /**
 562  * Performs one step of the Markov Decision Process. Other than in the literature the step starts
 563  * after having done the last action a_old. It observes the new state s_next and the reward
 564  * received. Then the coefficient update is done according to the SARSA or Q-learning method. The
 565  * next action is put into effect.
 566  * @param agent the agent performing the step
 567  */
 568 static void
 569 agent_step (struct RIL_Peer_Agent *agent)
 570 {
 571         int a_next = -1;
 572         double *s_next;
 573         double reward;
 574
 575         s_next = envi_get_state(agent->envi);
 576         reward = envi_get_reward(agent->envi, agent);
 577
 578         switch (agent->envi->parameters.algorithm)
 579         {
 580                 case RIL_ALGO_SARSA:
 581                         agent_modify_eligibility (agent, RIL_E_SET);
 582                         if (agent_decide_exploration (agent))
 583                         {
 584                                 a_next = agent_get_action_explore (agent, s_next);
 585                         }
 586                         else
 587                         {
 588                                 a_next = agent_get_action_best (agent, s_next);
 589                         }
 590                         //updates weights with selected action (on-policy)
 591                         agent_update_weights (agent, reward, s_next, a_next);
 592                         break;
 593
 594                 case RIL_ALGO_Q:
 595                         //updates weights with best action, disregarding actually selected action (off-policy)
 596                         a_next = agent_get_action_best (agent, s_next);
 597                         agent_update_weights (agent, reward, s_next, a_next);
 598                         if (agent_decide_exploration (agent))
 599                         {
 600                                 a_next = agent_get_action_explore (agent, s_next);
 601                                 agent_modify_eligibility(agent, RIL_E_ZERO);
 602                         }
 603                         else
 604                         {
 605                                 a_next = agent_get_action_best (agent, s_next);
 606                                 agent_modify_eligibility(agent, RIL_E_SET);
 607                         }
 608                         break;
 609         }
 610
 611         GNUNET_assert (-1 != a_next);
 612
 613         agent_modify_eligibility (agent, RIL_E_ACCUMULATE);
 614
 615         envi_do_action(agent->envi, agent, a_next);
 616
 617         GNUNET_free(agent->s_old);
 618         agent->s_old = s_next;
 619         agent->a_old = a_next;
 620
 621         agent->step_count += 1;
 622 }
 623
 624 /**
 625  * Cycles through all agents and lets the active ones do a step. Schedules the next step.
 626  * @param solver the solver handle
 627  * @param tc task context for the scheduler
 628  */
 629 static void
 630 ril_periodic_step (void *cls,
 631                                 const struct GNUNET_SCHEDULER_TaskContext *tc)
 632 {
 633         struct GAS_RIL_Handle *solver = cls;
 634         struct RIL_Peer_Agent *cur;
 635
 636         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "RIL step number %d\n", solver->step_count);
 637
 638         for (cur = solver->agents_head; NULL != cur; cur = cur->next)
 639         {
 640                 if (cur->active)
 641                 {
 642                         agent_step (cur);
 643                 }
 644         }
 645
 646         solver->step_count += 1;
 647         solver->next_step = GNUNET_SCHEDULER_add_delayed (
 648                         solver->step_time,
 649                         &ril_periodic_step,
 650                         solver);
 651 }
 652
 653 /**
 654  * Initialize an agent without addresses and its knowledge base
 655  * @param s ril solver
 656  * @param peer the one in question
 657  * @return handle to the new agent
 658  */
 659 static struct RIL_Peer_Agent *
 660 agent_init (void *s,
 661                 const struct GNUNET_PeerIdentity *peer)
 662 {
 663         int i;
 664         struct GAS_RIL_Handle * solver = s;
 665         struct RIL_Peer_Agent * agent = GNUNET_malloc (sizeof (struct RIL_Peer_Agent));
 666
 667         agent->envi = solver;
 668         agent->peer = *peer;
 669         agent->step_count = 0;
 670         agent->active = GNUNET_NO;
 671         agent->s_old = NULL;
 672         agent->n = RIL_ACTION_TYPE_NUM;
 673         agent->m = solver->networks_count * 4;
 674         agent->W = (double **) GNUNET_malloc (sizeof (double) * agent->n);
 675         for (i = 0; i < agent->n; i++)
 676         {
 677                 (agent->W)[i] = (double *) GNUNET_malloc (sizeof (double) * agent->m);
 678         }
 679         agent->a_old = -1;
 680         agent->e = (double *) GNUNET_malloc (sizeof (double) * agent->m);
 681         agent_modify_eligibility (agent, RIL_E_ZERO);
 682
 683         GNUNET_CONTAINER_DLL_insert_tail (solver->agents_head, solver->agents_tail, agent);
 684
 685         return agent;
 686 }
 687
 688 /**
 689  * Deallocate agent
 690  * @param s solver handle
 691  * @param agent the agent to retire
 692  */
 693 static void
 694 agent_die (struct GAS_RIL_Handle *solver,
 695                 struct RIL_Peer_Agent *agent)
 696 {
 697         int i;
 698
 699         for (i = 0; i < agent->n; i++)
 700         {
 701                 GNUNET_free((agent->W)[i]);
 702         }
 703         GNUNET_free(agent->W);
 704         GNUNET_free(agent->e);
 705         GNUNET_free(agent->s_old);
 706 }
 707
 708 /**
 709  * Returns the agent for a peer
 710  * @param s solver handle
 711  * @param peer identity of the peer
 712  * @return agent
 713  */
 714 static struct RIL_Peer_Agent *
 715 ril_get_agent (struct GAS_RIL_Handle *solver,
 716                 const struct GNUNET_PeerIdentity *peer)
 717 {
 718         struct RIL_Peer_Agent *cur;
 719
 720         for (cur = solver->agents_head; NULL != cur; cur = cur->next)
 721         {
 722                 if (0 == GNUNET_CRYPTO_hash_cmp (&peer->hashPubKey, &cur->peer.hashPubKey))
 723                 {
 724                         return cur;
 725                 }
 726         }
 727
 728         return agent_init (solver, peer);
 729 }
 730
 731 /**
 732  * Iterator, which allocates one agent per peer
 733  *
 734  * @param cls solver
 735  * @param key peer identity
 736  * @param value address
 737  * @return whether iterator should continue
 738  */
 739 static int
 740 ril_init_agents_it (void *cls,
 741                                 const struct GNUNET_HashCode *key,
 742                                 void *value)
 743 {
 744         struct GAS_RIL_Handle *solver = cls;
 745         struct ATS_Address *address = value;
 746         struct RIL_Peer_Agent *agent;
 747         uint32_t min_bw = ntohl (GNUNET_CONSTANTS_DEFAULT_BW_IN_OUT.value__);
 748
 749         agent = ril_get_agent (solver, &address->peer);
 750
 751         GNUNET_assert (NULL != agent);
 752
 753         if (NULL == agent->address)
 754         {
 755                 agent->address = address;
 756                 agent->address->active = GNUNET_YES;
 757                 agent->bw_in = min_bw;
 758                 agent->address->assigned_bw_in.value__ = htonl (min_bw);
 759                 agent->bw_out = min_bw;
 760                 agent->address->assigned_bw_out.value__ = htonl (min_bw);
 761         }
 762
 763         return GNUNET_YES;
 764 }
 765
 766 /**
 767  * Lookup network struct by type
 768  *
 769  * @param s the solver handle
 770  * @param type the network type
 771  * @return the network struct
 772  */
 773 static struct RIL_Network *
 774 ril_get_network (struct GAS_RIL_Handle *s, uint32_t type)
 775 {
 776   int i;
 777   for (i = 0 ; i < s->networks_count; i++)
 778   {
 779       if (s->network_entries[i].type == type)
 780         return &s->network_entries[i];
 781
 782   }
 783   return NULL;
 784 }
 785
 786
 787 /**
 788  *  Solver API functions
 789  *  ---------------------------
 790  */
 791
 792 /**
 793  * Changes the preferences for a peer in the problem
 794  *
 795  * @param solver the solver handle
 796  * @param peer the peer to change the preference for
 797  * @param kind the kind to change the preference
 798  * @param pref_rel the normalized preference value for this kind over all clients
 799  */
 800 void
 801 GAS_ril_address_change_preference (void *s,
 802                 const struct GNUNET_PeerIdentity *peer,
 803                 enum GNUNET_ATS_PreferenceKind kind,
 804                 double pref_rel)
 805 {
 806           GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
 807                       "Preference `%s' for peer `%s' changed to %.2f \n",
 808                       GNUNET_ATS_print_preference_type (kind),
 809                       GNUNET_i2s (peer),
 810                       pref_rel);
 811           /*
 812            * Nothing to do here. Preferences are considered during reward calculation.
 813            */
 814 }
 815
 816
 817 /**
 818  * Init the reinforcement learning problem solver
 819  *
 820  * Quotas:
 821  * network[i] contains the network type as type GNUNET_ATS_NetworkType[i]
 822  * out_quota[i] contains outbound quota for network type i
 823  * in_quota[i] contains inbound quota for network type i
 824  *
 825  * Example
 826  * network = {GNUNET_ATS_NET_UNSPECIFIED, GNUNET_ATS_NET_LOOPBACK, GNUNET_ATS_NET_LAN, GNUNET_ATS_NET_WAN, GNUNET_ATS_NET_WLAN}
 827  * network[2]   == GNUNET_ATS_NET_LAN
 828  * out_quota[2] == 65353
 829  * in_quota[2]  == 65353
 830  *
 831  * @param cfg configuration handle
 832  * @param stats the GNUNET_STATISTICS handle
 833  * @param network array of GNUNET_ATS_NetworkType with length dest_length
 834  * @param addresses hashmap containing all addresses
 835  * @param out_quota array of outbound quotas
 836  * @param in_quota array of outbound quota
 837  * @param dest_length array length for quota arrays
 838  * @param bw_changed_cb callback for changed bandwidth amounts
 839  * @param bw_changed_cb_cls cls for callback
 840  * @param get_preference callback to get relative preferences for a peer
 841  * @param get_preference_cls cls for callback to get relative preferences
 842  * @param get_properties_cls for callback to get relative properties
 843  * @param get_properties_cls cls for callback to get relative properties
 844  * @return handle for the solver on success, NULL on fail
 845  */
 846 void *
 847 GAS_ril_init (const struct GNUNET_CONFIGURATION_Handle *cfg,
 848                 const struct GNUNET_STATISTICS_Handle *stats,
 849                 const struct GNUNET_CONTAINER_MultiHashMap *addresses,
 850                 int *network,
 851                 unsigned long long *out_quota,
 852                 unsigned long long *in_quota,
 853                 int dest_length,
 854                 GAS_bandwidth_changed_cb bw_changed_cb,
 855                 void *bw_changed_cb_cls,
 856                 GAS_get_preferences get_preference,
 857                 void *get_preference_cls,
 858                 GAS_get_properties get_properties,
 859                 void *get_properties_cls)
 860 {
 861         int c;
 862         unsigned long long tmp;
 863         struct RIL_Network * cur;
 864         struct GAS_RIL_Handle *solver = GNUNET_malloc (sizeof (struct GAS_RIL_Handle));
 865
 866         GNUNET_assert (NULL != cfg);
 867         GNUNET_assert (NULL != stats);
 868         GNUNET_assert (NULL != network);
 869         GNUNET_assert (NULL != bw_changed_cb);
 870         GNUNET_assert (NULL != get_preference);
 871         GNUNET_assert (NULL != get_properties);
 872
 873         if (GNUNET_OK != GNUNET_CONFIGURATION_get_value_time(cfg, "ats", "RIL_STEP_TIME", &solver->step_time))
 874         {
 875                 solver->step_time = RIL_DEFAULT_STEP_TIME;
 876         }
 877         if (GNUNET_OK == GNUNET_CONFIGURATION_get_value_size(cfg, "ats", "RIL_DISCOUNT_FACTOR", &tmp))
 878         {
 879                 solver->parameters.gamma = (double) tmp / 100;;
 880         }
 881         else
 882         {
 883                 solver->parameters.gamma = RIL_DEFAULT_DISCOUNT_FACTOR;
 884         }
 885         if (GNUNET_OK == GNUNET_CONFIGURATION_get_value_size(cfg, "ats", "RIL_GRADIENT_STEP_SIZE", &tmp))
 886         {
 887                 solver->parameters.alpha = (double) tmp / 100;;
 888         }
 889         else
 890         {
 891                 solver->parameters.alpha = RIL_DEFAULT_GRADIENT_STEP_SIZE;
 892         }
 893         if (GNUNET_OK == GNUNET_CONFIGURATION_get_value_size(cfg, "ats", "RIL_TRACE_DECAY", &tmp))
 894         {
 895                 solver->parameters.lambda = (double) tmp / 100;;
 896         }
 897         else
 898         {
 899                 solver->parameters.lambda = RIL_DEFAULT_TRACE_DECAY;
 900         }
 901
 902         solver->stats = (struct GNUNET_STATISTICS_Handle *) stats;
 903         solver->callbacks = GNUNET_malloc (sizeof (struct RIL_Callbacks));
 904         solver->callbacks->bw_changed = bw_changed_cb;
 905         solver->callbacks->bw_changed_cls = bw_changed_cb_cls;
 906         solver->callbacks->get_preferences = get_preference;
 907         solver->callbacks->get_preferences_cls = get_preference_cls;
 908         solver->callbacks->get_properties = get_properties;
 909         solver->callbacks->get_properties_cls = get_properties_cls;
 910         solver->networks_count = dest_length;
 911         solver->network_entries = GNUNET_malloc (dest_length * sizeof (struct RIL_Network));
 912         solver->bulk_lock = GNUNET_NO;
 913         solver->addresses = addresses;
 914         solver->step_count = 0;
 915
 916         for (c = 0; c < dest_length; c++)
 917         {
 918                 cur = &solver->network_entries[c];
 919                 cur->type = network[c];
 920                 cur->bw_in_available = in_quota[c];
 921                 cur->bw_in_assigned = 0;
 922                 cur->bw_out_available = out_quota[c];
 923                 cur->bw_out_assigned = 0;
 924         }
 925
 926         c = GNUNET_CONTAINER_multihashmap_iterate (addresses, &ril_init_agents_it, solver);
 927
 928         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_init() has been called\n");
 929         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "RIL number of addresses: %d\n", c);
 930
 931         solver->next_step = GNUNET_SCHEDULER_add_delayed (
 932                                 GNUNET_TIME_relative_multiply (GNUNET_TIME_relative_get_millisecond_ (), 1000),
 933                                 &ril_periodic_step,
 934                                 solver);
 935
 936         return solver;
 937 }
 938
 939 /**
 940  * Shutdown the reinforcement learning problem solver
 941  *
 942  * @param solver the respective handle to shutdown
 943  */
 944 void
 945 GAS_ril_done (void * solver)
 946 {
 947         struct GAS_RIL_Handle *s = solver;
 948         struct RIL_Peer_Agent *cur_agent;
 949         struct RIL_Peer_Agent *next_agent;
 950
 951         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_done() has been called\n");
 952
 953         cur_agent = s->agents_head;
 954         while (NULL != cur_agent)
 955         {
 956                 next_agent = cur_agent->next;
 957                 GNUNET_CONTAINER_DLL_remove (s->agents_head, s->agents_tail, cur_agent);
 958                 agent_die (s, cur_agent);
 959                 cur_agent = next_agent;
 960         }
 961
 962         GNUNET_SCHEDULER_cancel (s->next_step);
 963         GNUNET_free (s->callbacks);
 964         GNUNET_free (s->network_entries);
 965         GNUNET_free (s);
 966 }
 967
 968
 969 /**
 970  * Add a single address within a network to the solver
 971  *
 972  * @param solver the solver Handle
 973  * @param address the address to add
 974  * @param network network type of this address
 975  */
 976 void
 977 GAS_ril_address_add (void *solver,
 978                 struct ATS_Address *address,
 979                 uint32_t network)
 980 {
 981         struct GAS_RIL_Handle *s = solver;
 982         //TODO! implement solver address add
 983         /*
 984          * if (new peer)
 985          *     initialize new agent
 986          * Add address
 987          * increase state vector
 988          * knowledge matrix
 989          * and action vector
 990          */
 991
 992         /*
 993          * reiterate all addresses, create new agent if necessary and give the agent the address
 994          */
 995         GNUNET_CONTAINER_multihashmap_iterate (s->addresses, &ril_init_agents_it, solver);
 996
 997         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_address_add() has been called\n");
 998 }
 999
1000 /**
1001  * Remove an address from the solver
1002  *
1003  * @param solver the solver handle
1004  * @param address the address to remove
1005  * @param session_only delete only session not whole address
1006  */
1007 void
1008 GAS_ril_address_delete (void *solver,
1009                 struct ATS_Address *address,
1010                 int session_only)
1011 {
1012         //TODO! implement solver address delete
1013         /*
1014          * remove address
1015          * if (last address of peer)
1016          *     remove agent
1017          * else
1018          *     decrease state vector
1019          *     decrease knowledge matrix
1020          *     decrease action vector
1021          */
1022         struct GAS_RIL_Handle *s = solver;
1023         struct RIL_Peer_Agent *agent;
1024
1025         agent = ril_get_agent(s, &address->peer);
1026
1027         if (0 == memcmp (agent->address->addr, address->addr, address->addr_len)) //if used address deleted
1028         {
1029                 agent->address = NULL; //delete address
1030                 GNUNET_CONTAINER_multihashmap_iterate (s->addresses, &ril_init_agents_it, solver); //put another address
1031                 if (NULL == agent->address) //no other address available
1032                 {
1033                         agent->active = GNUNET_NO;
1034                 }
1035         }
1036
1037         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_address_delete() has been called\n");
1038 }
1039
1040 /**
1041  * Transport properties for this address have changed
1042  *
1043  * @param solver solver handle
1044  * @param address the address
1045  * @param type the ATSI type in HBO
1046  * @param abs_value the absolute value of the property
1047  * @param rel_value the normalized value
1048  */
1049 void
1050 GAS_ril_address_property_changed (void *solver,
1051                 struct ATS_Address *address,
1052                 uint32_t type,
1053                 uint32_t abs_value,
1054                 double rel_value)
1055 {
1056           GNUNET_log (GNUNET_ERROR_TYPE_DEBUG,
1057                       "Property `%s' for peer `%s' address %p changed to %.2f \n",
1058                       GNUNET_ATS_print_property_type (type),
1059                       GNUNET_i2s (&address->peer),
1060                       address, rel_value);
1061           /*
1062            * Nothing to do here, properties are considered in every reward calculation
1063            */
1064 }
1065
1066
1067 /**
1068  * Transport session for this address has changed
1069  *
1070  * NOTE: values in addresses are already updated
1071  *
1072  * @param solver solver handle
1073  * @param address the address
1074  * @param cur_session the current session
1075  * @param new_session the new session
1076  */
1077 void
1078 GAS_ril_address_session_changed (void *solver,
1079                 struct ATS_Address *address,
1080                 uint32_t cur_session,
1081                 uint32_t new_session)
1082 {
1083         //TODO? consider session changed in solver behaviour
1084         /*
1085          * Potentially add session activity as a feature in state vector
1086          */
1087         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_address_session_changed() has been called\n");
1088 }
1089
1090
1091 /**
1092  * Usage for this address has changed
1093  *
1094  * NOTE: values in addresses are already updated
1095  *
1096  * @param solver solver handle
1097  * @param address the address
1098  * @param in_use usage state
1099  */
1100 void
1101 GAS_ril_address_inuse_changed (void *solver,
1102                 struct ATS_Address *address,
1103                 int in_use)
1104 {
1105         //TODO! consider address_inuse_changed according to matthias' email
1106         /**
1107          * See matthias' email
1108          */
1109         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_address_inuse_changed() has been called\n");
1110 }
1111
1112 /**
1113  * Network scope for this address has changed
1114  *
1115  * NOTE: values in addresses are already updated
1116  *
1117  * @param solver solver handle
1118  * @param address the address
1119  * @param current_network the current network
1120  * @param new_network the new network
1121  */
1122 void
1123 GAS_ril_address_change_network (void *solver,
1124                 struct ATS_Address *address,
1125                 uint32_t current_network,
1126                 uint32_t new_network)
1127 {
1128         struct GAS_RIL_Handle *s = solver;
1129         struct RIL_Peer_Agent *agent;
1130         struct RIL_Network *net;
1131
1132         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "Network type changed, moving %s address from `%s' to `%s'\n",
1133                                 (GNUNET_YES == address->active) ? "active" : "inactive",
1134                                  GNUNET_ATS_print_network_type (current_network),
1135                                  GNUNET_ATS_print_network_type (new_network));
1136
1137         agent = ril_get_agent(s, &address->peer);
1138
1139         if (address->active)
1140         {
1141                 //remove from old network
1142                 net = ril_get_network (s, current_network);
1143                 net->bw_in_assigned -= agent->bw_in;
1144                 net->bw_out_assigned -= agent->bw_out;
1145
1146                 //add to new network
1147                 net = ril_get_network (s, new_network);
1148                 net->bw_in_assigned += agent->bw_in;
1149                 net->bw_out_assigned += agent->bw_out;
1150         }
1151 }
1152
1153 /**
1154  * Get application feedback for a peer
1155  *
1156  * @param solver the solver handle
1157  * @param application the application
1158  * @param peer the peer to change the preference for
1159  * @param scope the time interval for this feedback: [now - scope .. now]
1160  * @param kind the kind to change the preference
1161  * @param score the score
1162  */
1163 void
1164 GAS_ril_address_preference_feedback (void *solver,
1165                 void *application,
1166                 const struct GNUNET_PeerIdentity *peer,
1167                 const struct GNUNET_TIME_Relative scope,
1168                 enum GNUNET_ATS_PreferenceKind kind,
1169                 double score)
1170 {
1171         //TODO! collect reward until next reward calculation
1172         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_address_preference_feedback() has been called\n");
1173 }
1174
1175 /**
1176  * Start a bulk operation
1177  *
1178  * @param solver the solver
1179  */
1180 void
1181 GAS_ril_bulk_start (void *solver)
1182 {
1183         //TODO? consideration: keep bulk counter and stop agents during bulk
1184         /*
1185          * bulk counter up, but not really relevant, because there is no complete calculation of the
1186          * bandwidth assignment triggered anyway. Therefore, changes to addresses can come and go as
1187          * they want. Consideration: Step-pause during bulk-start-stop period...
1188          */
1189         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_bulk_start() has been called\n");
1190 }
1191
1192
1193 /**
1194  * Bulk operation done
1195  */
1196 void
1197 GAS_ril_bulk_stop (void *solver)
1198 {
1199         //TODO? consideration: keep bulk counter and stop agents during bulk
1200         /*
1201          * bulk counter down, see bulk_start()
1202          */
1203         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_bulk_stop() has been called\n");
1204 }
1205
1206 /**
1207  * Get the preferred address for a specific peer
1208  *
1209  * @param solver the solver handle
1210  * @param peer the identity of the peer
1211  */
1212 const struct ATS_Address *
1213 GAS_ril_get_preferred_address (void *solver,
1214                 const struct GNUNET_PeerIdentity *peer)
1215 {
1216         /*
1217          * activate agent, return currently chosen address
1218          */
1219         struct GAS_RIL_Handle *s = solver;
1220         struct RIL_Peer_Agent *agent;
1221
1222         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_get_preferred_address() has been called\n");
1223
1224         agent = ril_get_agent(s, peer);
1225         agent->active = GNUNET_YES;
1226
1227         GNUNET_assert (NULL != agent->address);
1228
1229         return agent->address;
1230 }
1231
1232 /**
1233  * Stop notifying about address and bandwidth changes for this peer
1234  *
1235  * @param solver the solver handle
1236  * @param peer the peer
1237  */
1238 void
1239 GAS_ril_stop_get_preferred_address (void *solver,
1240                 const struct GNUNET_PeerIdentity *peer)
1241 {
1242         struct GAS_RIL_Handle *s = solver;
1243         struct RIL_Peer_Agent *agent;
1244
1245         GNUNET_log (GNUNET_ERROR_TYPE_DEBUG, "ril_stop_get_preferred_address() has been called\n");
1246
1247         agent = ril_get_agent(s, peer);
1248         agent->active = GNUNET_NO;
1249 }
1250
1251 /* end of gnunet-service-ats-solver_ril.c */