agent_update (struct RIL_Peer_Agent *agent, double reward, double *s_next, int a_prime)
{
int i;
+ int k;
double delta;
- double *theta = agent->W[agent->a_old];
+ double **theta = agent->W;
delta = agent->envi->global_discount_integrated * reward; //reward
delta += agent->envi->global_discount_variable * agent_q (agent, s_next, a_prime); //discounted future value
// agent_q (agent, s_next, a_prime),
// delta);
- for (i = 0; i < agent->m; i++)
+ for (k = 0; k < agent->n; k++)
{
-// LOG(GNUNET_ERROR_TYPE_INFO, "alpha = %f delta = %f e[%d] = %f\n",
-// agent->envi->parameters.alpha,
-// delta,
-// i,
-// agent->e[i]);
- theta[i] += agent->envi->parameters.alpha * delta * agent->E[agent->a_old][i];
+ for (i = 0; i < agent->m; i++)
+ {
+ // LOG(GNUNET_ERROR_TYPE_INFO, "alpha = %f delta = %f e[%d] = %f\n",
+ // agent->envi->parameters.alpha,
+ // delta,
+ // i,
+ // agent->e[i]);
+ theta[k][i] += agent->envi->parameters.alpha * delta * agent->E[k][i];
+ }
}
}
agent->E[action][i] += feature[i];
break;
case RIL_E_REPLACE:
- agent->E[action][i] = (agent->envi->global_discount_variable * agent->envi->parameters.lambda * agent->E[action][i]) > feature[i] ? agent->E[action][i] : feature[i];
+ agent->E[action][i] = agent->E[action][i] > feature[i] ? agent->E[action][i] : feature[i];
break;
case RIL_E_DISCOUNT:
for (k = 0; k < agent->n; k++)
return 1;
}
-static double
-envi_penalty_share (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
-{
- struct RIL_Scope *net;
- double util_ratio_in;
- double util_ratio_out;
- double util_ratio_max;
- double sigmoid_x;
-
- net = agent->address_inuse->solver_information;
-
- util_ratio_in = (double) net->bw_in_utilized / (double) net->bw_in_available;
- util_ratio_out = (double) net->bw_out_utilized / (double) net->bw_out_available;
- util_ratio_max = GNUNET_MAX (util_ratio_in, util_ratio_out);
- sigmoid_x = util_ratio_max - 1;
- return 1 - (1 / (1 + exp(5 * sigmoid_x)));
-}
+//static double
+//envi_penalty_share (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
+//{
+// struct RIL_Scope *net;
+// double util_ratio_in;
+// double util_ratio_out;
+// double util_ratio_max;
+// double sigmoid_x;
+//
+// net = agent->address_inuse->solver_information;
+//
+// util_ratio_in = (double) net->bw_in_utilized / (double) net->bw_in_available;
+// util_ratio_out = (double) net->bw_out_utilized / (double) net->bw_out_available;
+// util_ratio_max = GNUNET_MAX (util_ratio_in, util_ratio_out);
+// sigmoid_x = util_ratio_max - 1;
+// return 1 - (1 / (1 + exp(5 * sigmoid_x)));
+//}
static double
envi_get_penalty (struct GAS_RIL_Handle *solver, struct RIL_Peer_Agent *agent)
net = agent->address_inuse->solver_information;
- //TODO make sure in tests to have utilization property updated
if (net->bw_in_utilized > net->bw_in_available)
{
over_in = net->bw_in_utilized - net->bw_in_available;
steady = (RIL_ACTION_NOTHING == agent->a_old) ? agent->nop_bonus : 0;
- pen_share = envi_penalty_share(solver, agent);
+ //pen_share = envi_penalty_share(solver, agent); TODO revert
+ pen_share = 0.5;
penalty = envi_get_penalty(solver, agent);
reward = delta + steady;