-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathinference_engine.py
More file actions
88 lines (83 loc) · 3.77 KB
/
Copy pathinference_engine.py
File metadata and controls
88 lines (83 loc) · 3.77 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import time
import numpy as np
from typing import List, Tuple
class InferenceEngine:
def __init__(self, model_name="resnet50", shard_id=0):
self.model_name = model_name
self.shard_id = shard_id
self.num_classes = 1000
self.hidden_size = 1024
np.random.seed(42 + shard_id)
self.weights = np.random.randn(self.hidden_size, self.hidden_size).astype(np.float32)
def predict(self, input_data: List[float], input_shape: List[int]) -> Tuple[List[float], int]:
"""Performs a real computation instead of sleeping."""
start_time = time.perf_counter()
x = np.array(input_data, dtype=np.float32)
x = x[:self.hidden_size] if x.size >= self.hidden_size else np.pad(x, (0, self.hidden_size - x.size))
x = x.reshape(1, self.hidden_size)
# deep layer processing with actual MatMul
for _ in range(5):
x = np.matmul(x, self.weights)
x = np.tanh(x)
output = np.abs(x[0, :self.num_classes])
output = (output / output.sum()).tolist()
inference_time_us = int((time.perf_counter() - start_time) * 1_000_000)
return output, inference_time_us
def batch_predict(self, inputs: List[List[float]], shapes: List[List[int]]) -> List[Tuple[List[float], int]]:
"""Uses vectorized batch processing for efficiency."""
if not inputs: return []
start_time = time.perf_counter()
batch_size = len(inputs)
batch_array = np.zeros((batch_size, self.hidden_size), dtype=np.float32)
for i, inp in enumerate(inputs):
arr = np.array(inp, dtype=np.float32)[:self.hidden_size]
batch_array[i, :len(arr)] = arr
x = batch_array
for _ in range(5):
x = np.matmul(x, self.weights)
x = np.tanh(x)
total_time_us = int((time.perf_counter() - start_time) * 1_000_000)
per_item_time = total_time_us // batch_size
results = []
for i in range(batch_size):
out = np.abs(x[i, :self.num_classes])
results.append(((out / out.sum()).tolist(), per_item_time))
return results
def get_model_info(self):
"""Get model information"""
return {
'model_name': self.model_name,
'shard_id': self.shard_id,
'num_classes': self.num_classes,
'weights_size_mb': self.weights.nbytes / (1024 * 1024)
}
if __name__ == "__main__":
print("Testing Inference Engine")
print("=" * 50)
engine = InferenceEngine(model_name="resnet50", shard_id=0)
info = engine.get_model_info()
print(f"Model: {info['model_name']}")
print(f"Shard: {info['shard_id']}")
print(f"Classes: {info['num_classes']}")
print(f"Weights: {info['weights_size_mb']:.2f} MB")
print()
print("Single Inference Test:")
input_data = np.random.rand(224 * 224 * 3).tolist()
input_shape = [1, 224, 224, 3]
output, inference_time = engine.predict(input_data, input_shape)
print(f" Input size: {len(input_data)}")
print(f" Output size: {len(output)}")
print(f" Inference time: {inference_time/1000:.2f} ms")
print(f" Top-5 classes: {sorted(output, reverse=True)[:5]}")
print()
print("Batch Inference Test (batch_size=8):")
batch_inputs = [np.random.rand(224 * 224 * 3).tolist() for _ in range(8)]
batch_shapes = [[1, 224, 224, 3]] * 8
start = time.time()
batch_results = engine.batch_predict(batch_inputs, batch_shapes)
batch_time = (time.time() - start) * 1000
print(f" Batch size: {len(batch_inputs)}")
print(f" Total time: {batch_time:.2f} ms")
print(f" Per-item time: {batch_time/len(batch_inputs):.2f} ms")
print(f" Speedup vs single: {(inference_time/1000) / (batch_time/len(batch_inputs)):.2f}x")
print("=" * 50)