+/**
+ * Gets any action, to explore the action space from that state
+ * @param agent agent performing the calculation
+ * @param state the state from which to take the action
+ * @return any action
+ */
+int
+agent_get_action_explore (struct RIL_Peer_Agent *agent,
+ double *state)
+{
+ return GNUNET_CRYPTO_random_u32(GNUNET_CRYPTO_QUALITY_WEAK, agent->n);
+}
+
+/**
+ * Updates the weights (i.e. coefficients) of the weight vector in matrix W for action a
+ * @param agent the agent performing the update
+ * @param reward the reward received for the last action
+ * @param s_next the new state, the last step got the agent into
+ * @param a_prime the new
+ */
+void
+agent_update_weights (struct RIL_Peer_Agent *agent,
+ double reward,
+ double *s_next,
+ int a_prime)
+{
+ int i;
+ double delta;
+ double *theta = (agent->W)[agent->a_old];
+
+ delta = reward + agent_estimate_q (agent, s_next, a_prime) -
+ agent_estimate_q (agent, agent->s_old, agent->a_old);
+ for (i = 0; i < agent->m; i++)
+ {
+ theta[i] += agent->envi->parameters.alpha * delta * (agent->e)[i];
+ }
+}
+
+/**
+ * Changes the eligibility trace vector e in various manners:
+ * RIL_E_ACCUMULATE - adds 1 to each component as in accumulating eligibility traces
+ * RIL_E_REPLACE - resets each component to 1 as in replacing traces
+ * RIL_E_SET - multiplies e with gamma and lambda as in the update rule
+ * RIL_E_ZERO - sets e to 0 as in Watkin's Q-learning algorithm when exploring and when initializing
+ * @param agent
+ * @param mod
+ */
+void
+agent_modify_eligibility (struct RIL_Peer_Agent *agent,
+ enum RIL_E_Modification mod)
+{
+ int i;
+ double *e = agent->e;
+ double gamma = agent->envi->parameters.gamma;
+ double lambda = agent->envi->parameters.lambda;
+
+ for (i = 0; i < agent->m; i++)
+ {
+ switch (mod)
+ {
+ case RIL_E_ACCUMULATE:
+ e[i] += 1;
+ break;
+ case RIL_E_REPLACE:
+ e[i] = 1;
+ break;
+ case RIL_E_SET:
+ e[i] = gamma * lambda;
+ break;
+ case RIL_E_ZERO:
+ e[i] = 0;
+ break;
+ }
+ }
+}
+
+/**
+ * Allocates a state vector and fills it with the features present
+ * @param solver the solver handle
+ * @return pointer to the state vector
+ */