-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathplot.py
More file actions
109 lines (89 loc) · 3.26 KB
/
plot.py
File metadata and controls
109 lines (89 loc) · 3.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import matplotlib.pyplot as plt
import numpy as np
"""
This module defines method to plot the result of the different experiments.
"""
def plot_regret_advertising(opt, reward_per_experiment):
"""
plot the cumulative regret and the comparison between the optimal reward and the obtained one.
"""
plt.figure(0)
plt.ylabel("Regret")
plt.xlabel("t")
plt.plot(np.cumsum(np.mean(opt - reward_per_experiment, axis=0)), "r")
plt.figure(1)
plt.ylabel("Reward")
plt.xlabel("t")
mean_reward = np.mean(reward_per_experiment, axis=0)
opt = np.ones(len(mean_reward)) * opt
plt.plot(opt, 'r', label=u'Optimal Reward')
plt.plot(mean_reward, 'b', label=u'GPTS Reward')
plt.legend(loc='lower right')
plt.show()
def plot_regret_comparison(opt_per_phase, reward_per_experiment, sw_reward_per_experiment):
"""
plot the comparison of the regret between the GPTS and SW_GPTS in a non stationary environment
and reward comparison
"""
plt.figure(0)
plt.ylabel("Regret")
plt.xlabel("t")
opt_per_round = np.zeros(len(reward_per_experiment[0]))
n_phases = len(opt_per_phase)
phase_len = int(len(reward_per_experiment[0])/n_phases)
for i in range(0, n_phases):
opt_per_round[i*phase_len: (i+1)*phase_len] = opt_per_phase[i]
cum_regret = np.cumsum(np.mean(opt_per_round - reward_per_experiment, axis=0))
sw_cum_regret = np.cumsum(np.mean(opt_per_round - sw_reward_per_experiment, axis=0))
plt.plot(cum_regret, "r", label=u'Stationary Regret')
plt.plot(sw_cum_regret, "b", label=u'SW Regret')
plt.legend(loc='lower right')
plt.figure(2)
plt.ylabel("Reward")
plt.xlabel("t")
mean_reward = np.mean(sw_reward_per_experiment, axis=0)
opt_per_round = np.ones(len(mean_reward)) * opt_per_round
plt.plot(opt_per_round, 'r', label=u'Optimal Reward')
plt.plot(mean_reward, 'b', label=u'GPTS Reward')
plt.legend(loc='lower right')
plt.show()
def plot_cum_regret(opt, algorithm):
plt.figure(0)
plt.ylabel("Regret")
plt.xlabel("t")
plt.plot(np.cumsum(np.mean(opt - algorithm, axis=0)), 'r')
plt.show()
'''
mean_reward = np.mean(algorithm, axis=0)
mr_smooth = np.array([])
for t in range(0, len(mean_reward)):
mr_smooth = np.append(mr_smooth, np.cumsum(mean_reward)[t] / (t + 1))
opt = np.ones(len(mean_reward)) * opt
plt.figure(1)
plt.ylabel("Reward")
plt.xlabel("t")
plt.plot(opt, 'r', label=u'Optimal Reward')
plt.plot(mr_smooth, 'b', label=u'TS Reward')
plt.legend(loc='lower right')
'''
plt.figure(2)
plt.ylabel("Reward")
plt.xlabel("t")
mean_reward = np.mean(algorithm, axis=0)
opt = np.ones(len(mean_reward)) * opt
plt.plot(opt, 'r', label=u'Optimal Reward')
plt.plot(mean_reward, 'b', label=u'TS Reward')
plt.legend(loc='lower right')
plt.show()
def plot_regret_per_arm(opt, algorithm, plot_info):
plt.figure(0)
plt.ylabel("Regret comparison")
plt.xlabel("Days")
arms = np.mean(algorithm, axis=0)
print(len(arms))
print(len(arms[0]))
for element in range(0, len(plot_info)):
arm = plot_info[element]
plt.plot(np.cumsum(opt - arms[element]), label=u'{arm}'.format(arm=arm))
plt.legend(loc='best')
plt.show()