이것저것.. 원본 소스까지 뒤지고 있는데 이렇다 할 원하는 답이 안보인다.
[링크 : https://www.tensorflow.org/model_optimization/guide/quantization/training]
[링크 : https://www.tensorflow.org/model_optimization/guide/quantization/training_example]
[링크 : https://github.com/tensorflow/.../lite/g3doc/performance/post_training_quantization.md]
[링크 : https://github.com/tensorflow/.../lite/g3doc/performance/quantization_spec.md]
util_test.py
def _generate_integer_tflite_model(quantization_type=dtypes.int8):
"""Define an integer post-training quantized tflite model."""
# Load MNIST dataset
n = 10 # Number of samples
(train_images, train_labels), (test_images, test_labels) = \
tf.keras.datasets.mnist.load_data()
train_images, train_labels, test_images, test_labels = \
train_images[:n], train_labels[:n], test_images[:n], test_labels[:n]
# Normalize the input image so that each pixel value is between 0 to 1.
train_images = train_images / 255.0
test_images = test_images / 255.0
# Define TF model
model = tf.keras.Sequential([
tf.keras.layers.InputLayer(input_shape=(28, 28)),
tf.keras.layers.Reshape(target_shape=(28, 28, 1)),
tf.keras.layers.Conv2D(filters=12, kernel_size=(3, 3), activation="relu"),
tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(10)
])
# Train
model.compile(
optimizer="adam",
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=["accuracy"])
model.fit(
train_images,
train_labels,
epochs=1,
validation_split=0.1,
)
# Convert TF Model to an Integer Quantized TFLite Model
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = {tf.lite.Optimize.DEFAULT}
def representative_dataset_gen():
for _ in range(2):
yield [
np.random.uniform(low=0, high=1, size=(1, 28, 28)).astype(
np.float32)
]
converter.representative_dataset = representative_dataset_gen
if quantization_type == dtypes.int8:
converter.target_spec.supported_ops = {tf.lite.OpsSet.TFLITE_BUILTINS_INT8}
else:
converter.target_spec.supported_ops = {
tf.lite.OpsSet
.EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8
}
tflite_model = converter.convert()
return tflite_model
lite_v2_test.py
def _getIntegerQuantizeModel(self):
np.random.seed(0)
root = tracking.AutoTrackable()
@tf.function(
input_signature=[tf.TensorSpec(shape=[1, 5, 5, 3], dtype=tf.float32)])
def func(inp):
conv = tf.nn.conv2d(
inp, tf.ones([3, 3, 3, 16]), strides=[1, 1, 1, 1], padding='SAME')
output = tf.nn.relu(conv, name='output')
return output
def calibration_gen():
for _ in range(5):
yield [np.random.uniform(-1, 1, size=(1, 5, 5, 3)).astype(np.float32)]
root.f = func
to_save = root.f.get_concrete_function()
return (to_save, calibration_gen)
def testInvalidIntegerQuantization(self, is_int16_quantize,
inference_input_output_type):
func, calibration_gen = self._getIntegerQuantizeModel()
# Convert quantized model.
quantized_converter = lite.TFLiteConverterV2.from_concrete_functions([func])
quantized_converter.optimizations = [lite.Optimize.DEFAULT]
quantized_converter.representative_dataset = calibration_gen
if is_int16_quantize:
quantized_converter.target_spec.supported_ops = [
lite.OpsSet.\
EXPERIMENTAL_TFLITE_BUILTINS_ACTIVATIONS_INT16_WEIGHTS_INT8,
lite.OpsSet.TFLITE_BUILTINS
]
with self.assertRaises(ValueError) as error:
quantized_converter.inference_input_type = dtypes.int8
quantized_converter.inference_output_type = dtypes.int8
quantized_converter.convert()
self.assertEqual(
'The inference_input_type and inference_output_type '
"must be in ['tf.float32', 'tf.int16'].", str(error.exception))
def testCalibrateAndQuantizeBuiltinInt16(self):
func, calibration_gen = self._getIntegerQuantizeModel()
# Convert float model.
float_converter = lite.TFLiteConverterV2.from_concrete_functions([func])
float_tflite_model = float_converter.convert()
self.assertIsNotNone(float_tflite_model)
converter = lite.TFLiteConverterV2.from_concrete_functions([func])
# TODO(b/156309549): We should add INT16 to the builtin types.
converter.optimizations = [lite.Optimize.DEFAULT]
converter.target_spec.supported_ops = [lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.representative_dataset = calibration_gen
converter._experimental_calibrate_only = True
calibrated_tflite = converter.convert()
quantized_tflite_model = mlir_quantize(
calibrated_tflite, inference_type=_types_pb2.QUANTIZED_INT16)
self.assertIsNotNone(quantized_tflite_model)
# The default input and output types should be float.
interpreter = Interpreter(model_content=quantized_tflite_model)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
self.assertLen(input_details, 1)
self.assertEqual(np.float32, input_details[0]['dtype'])
output_details = interpreter.get_output_details()
self.assertLen(output_details, 1)
self.assertEqual(np.float32, output_details[0]['dtype'])
# Ensure that the quantized weights tflite model is smaller.
self.assertLess(len(quantized_tflite_model), len(float_tflite_model))