-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathLoadDataset.py
More file actions
107 lines (85 loc) · 3.48 KB
/
Copy pathLoadDataset.py
File metadata and controls
107 lines (85 loc) · 3.48 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import sys
import time
import pickle
import random
import numpy as np
classes = 10
img_size = 32
img_channels = 3
# How to load data
#train_X, train_lab, test_X, test_lab = get_data()
#train_X, test_X = normalize(train_X, test_X)
def unpickle(file):
with open(file, 'rb') as fo:
dict = pickle.load(fo, encoding='bytes')
data_X = dict[b'data']
test_labels = dict[b'labels']
print("Loading %s : %d." % (file, len(data_X)))
return data_X, test_labels
def load_data(files, data_dir, label_count):
global img_size, img_channels
data, labels = unpickle(data_dir + '/' + files[0])
for f in files[1:]:
data_n, labels_n = unpickle(data_dir + '/' + f)
data = np.append(data, data_n, axis=0)
labels = np.append(labels, labels_n, axis=0)
#labels = np.array([[float(i == label) for i in range(label_count)] for label in labels])
# channels_last
data = data.reshape([-1, img_channels, img_size, img_size])
data = data.transpose([0, 2, 3, 1])
return data, labels
def get_data():
data_dir = '.\Datasets\cifar-10-batches-py'
img_dim = img_size * img_size * img_channels
# batches.meta contains the names of the different labels ("truck", "plane", ...)
#meta = unpickle(data_dir + '/batches.meta')
n_labels = classes
train_files = ['data_batch_%d' % d for d in range(1, 6)]
train_data, train_labels = load_data(train_files, data_dir, n_labels)
test_data, test_labels = load_data(['test_batch'], data_dir, n_labels)
print("Train data:", np.shape(train_data), np.shape(train_labels))
print("Test data :", np.shape(test_data), np.shape(test_labels))
# Shuffle
ind = np.random.permutation(len(train_data))
train_data = train_data[ind]
train_labels = train_labels[ind]
return train_data, train_labels, test_data, test_labels
def normalize(train_X, test_X):
train_X = train_X.astype('float32')
test_X = test_X.astype('float32')
mean_X = np.mean(train_X / np.max(train_X))
std_X = np.std(train_X / np.max(train_X))
train_norm = train_X / np.max(train_X)
train_norm = (train_norm - mean_X) / std_X
test_norm = test_X / np.max(test_X)
test_norm = (test_norm - mean_X) / std_X
return train_norm, test_norm
# =============================================================================== #
# Functions for data augmentation (not really needed for a comparison study)
# =============================================================================== #
def _random_crop(batch, crop_shape, padding=None):
oshape = np.shape(batch[0])
if padding:
oshape = (oshape[0] + 2 * padding, oshape[1] + 2 * padding)
new_batch = []
npad = ((padding, padding), (padding, padding), (0, 0))
for i in range(len(batch)):
new_batch.append(batch[i])
if padding:
new_batch[i] = np.lib.pad(batch[i], pad_width=npad,
mode='constant', constant_values=0)
nh = random.randint(0, oshape[0] - crop_shape[0])
nw = random.randint(0, oshape[1] - crop_shape[1])
new_batch[i] = new_batch[i][nh:nh + crop_shape[0],
nw:nw + crop_shape[1]]
return new_batch
def _random_flip_leftright(batch):
for i in range(len(batch)):
if bool(random.getrandbits(1)):
batch[i] = np.fliplr(batch[i])
return batch
def data_augmentation(batch):
batch = _random_flip_leftright(batch)
batch = _random_crop(batch, [32, 32], 4)
return batch