I'm running exactly the same MobileNetv3-Cityscapes model using TensorFlow and same input image on C++ and Python code but I get slightly different results. The difference is visible after decimal point. Code is run on RaspberryPi4 (AARCH64).
Output for Python:
(1, 1025, 2049, 3)
input_data: [[[[-0.7411765 -0.73333335 -0.85882354]
[-0.70980394 -0.70980394 -0.827451 ]
[-0.6392157 -0.64705884 -0.77254903]
...
[-0.78039217 -0.79607844 -0.9529412 ]
[-0.81960785 -0.8352941 -0.9843137 ]
[-0.8352941 -0.8509804 -1. ]]
[[-0.75686276 -0.7647059 -0.8901961 ]
[-0.73333335 -0.7411765 -0.85882354]
[-0.6784314 -0.69411767 -0.8117647 ]
...
[-0.7176471 -0.7411765 -0.90588236]
[-0.7254902 -0.7490196 -0.9137255 ]
[-0.7254902 -0.7490196 -0.9137255 ]]
[[-0.77254903 -0.7882353 -0.90588236]
[-0.75686276 -0.77254903 -0.8901961 ]
[-0.7176471 -0.7411765 -0.85882354]
...
[-0.6392157 -0.6862745 -0.85882354]
[-0.60784316 -0.64705884 -0.827451 ]
[-0.5921569 -0.6392157 -0.8117647 ]]
...
[[-0.4823529 -0.52156866 -0.46666664]
[-0.47450978 -0.5137255 -0.4588235 ]
[-0.46666664 -0.5058824 -0.45098037]
...
[-0.8509804 -0.8509804 -0.8352941 ]
[-0.8509804 -0.8509804 -0.8352941 ]
[-0.84313726 -0.84313726 -0.827451 ]]
[[-0.49019605 -0.5294118 -0.47450978]
[-0.4823529 -0.52156866 -0.46666664]
[-0.46666664 -0.5058824 -0.45098037]
...
[-0.827451 -0.827451 -0.8117647 ]
[-0.8117647 -0.8117647 -0.79607844]
[-0.8039216 -0.8039216 -0.7882353 ]]
[[-0.49019605 -0.5294118 -0.47450978]
[-0.4823529 -0.52156866 -0.46666664]
[-0.46666664 -0.5058824 -0.45098037]
...
[-0.8117647 -0.8117647 -0.79607844]
[-0.7882353 -0.7882353 -0.77254903]
[-0.77254903 -0.77254903 -0.75686276]]]]
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
(1, 1025, 2049, 19)
raw_prediction: [-1.9500647 -4.2847333 8.452963 -3.321663 -0.6902989 3.3582914
0.7511586 0.71168745 8.266863 -0.5422214 -0.25331986 -0.7904396
-2.3567524 -1.5183947 -1.4971358 2.4561167 0.4811486 -3.4283543
-3.6689208 ]
Output for C++:
input_data:
[ -0.741176, -0.733333, -0.858824, -0.709804, -0.709804, -0.827451, -0.639216, -0.647059, -0.772549, -0.592157, -0.615686, -0.72549, -0.560784, -0.607843, -0.709804, -0.537255, -0.6, -0.701961, -0.537255, -0.607843, -0.701961, -0.545098, -0.631373, -0.717647, -0.6, -0.694118, -0.772549, -0.670588, -0.772549, -0.827451, -0.780392, -0.87451, -0.913725, -0.866667, -0.952941, -0.976471, -0.866667, -0.945098, -0.952941, -0.858824, -0.929412, -0.921569, -0.811765, -0.866667, -0.858824, -0.764706, -0.811765, -0.819608, -0.780392, -0.827451, -0.843137, -0.796078, -0.827451, -0.866667, -0.741176, -0.780392, -0.85098, -0.686275, -0.733333, -0.827451, -0.717647, -0.764706, -0.87451, -0.741176, -0.796078, -0.921569, -0.733333, -0.796078, -0.913725, -0.717647, -0.788235, -0.905882, -0.756863, -0.827451, -0.913725, -0.803922, -0.87451, -0.929412, -0.843137, -0.898039, -0.952941, -0.882353, -0.929412, -0.976471, -0.905882, -0.929412, -0.984314, -0.921569, -0.921569, -0.984314, -0.827451, -0.827451, -0.913725, -0.717647, -0.694118, -0.819608, -0.623529, -0.607843, -0.772549, -0.537255, -0.537255, ...
raw_prediction:
[ -1.85958, -4.14567, 8.41367, -3.15393, -0.717632, 3.51989, 0.824568, 0.803327, 8.24112, -0.525274, -0.253021, -0.902755, -2.32101, -1.61767, -1.41086, 2.22645, 0.285652, -3.46446, -3.75752, -1.91153, -4.14749, 8.39831, -3.1724, -0.785404, 3.31039, 0.669547, 0.780974, 8.2189, -0.540953, -0.240978, -0.692887, -2.12443, -1.60665, -1.43729, 2.24428, 0.29506, -3.38252, -3.68832, -1.96347, -4.14931, 8.38296, -3.19087, -0.853175, 3.10088, 0.514526, 0.758622, 8.19668, -0.556632, -0.228935, -0.483018, -1.92786, -1.59563, -1.46371, 2.26211, 0.304468, -3.30057, -3.61912, -2.01542, -4.15112, 8.3676, -3.20933, -0.920947, 2.89137, 0.359505, 0.736269, 8.17446, -0.572311, -0.216892, -0.27315, -1.73128, -1.5846, -1.49013, 2.27994, 0.313876, -3.21863, -3.54992, -2.06736, -4.15294, 8.35224, -3.2278, -0.988718, 2.68186, 0.204483, 0.713917, 8.15224, -0.58799, -0.204849, -0.063281, -1.5347, -1.57358, -1.51656, 2.29776, 0.323284, -3.13669, -3.48072, -2.11931, -4.15476, 8.33689, -3.24627, -1.05649, 2.47236, ...
As you can see the input data is the same for both C++ and Python. There is only a difference in rounding on last digit because of 'print' but the numbers under debug are the same.
The real difference can be observed in output data. I'll take first 3 values for both languages:
|C++|Python|
|---|------|
|-1.85958|-1.9500647|
|-4.14567|-4.2847333|
|8.41367|8.452963|
It looks like some kind of a float/double issue on the first sight but the code uses float (Float32) on both sides. I have checked the real values under C++ debugger to be sure that what I see is what I get :)
I will be happy if someone shed some light on this. Thanks! :)
The code for Python:
import cv2
import numpy as np
import tensorflow as tf
if __name__ == '__main__':
image = cv2.imread('../input_videos/photo2.jpeg')
model_path = '../models/lite-model_deeplabv3-mobilenetv3-cityscapes_1_default_2.tflite'
# preprocess data
frame = image
input_size = (2049, 1025)
resized_frame = cv2.resize(frame, input_size)
resized_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)
frame_for_prediction = np.asarray(resized_frame).astype(np.float32)
frame_for_prediction = np.expand_dims(frame_for_prediction, 0)
frame_for_prediction = frame_for_prediction / 127.5 - 1
print(frame_for_prediction.shape)
print(f"input_data: {frame_for_prediction}")
# load model
interpreter = tf.lite.Interpreter(model_path=model_path)
input_details = interpreter.get_input_details()
interpreter.allocate_tensors()
interpreter.set_tensor(input_details[0]['index'], frame_for_prediction)
interpreter.invoke()
raw_prediction = interpreter.tensor(interpreter.get_output_details()[0]['index'])()
print(raw_prediction.shape)
print(f"raw_prediction: {raw_prediction[0, 0, 0, :]}")
The code for C++:
#include <iostream>
#include "opencv2/opencv.hpp"
#include "tensorflow/lite/interpreter.h"
#include "tensorflow/lite/model_builder.h"
#include "tensorflow/lite/interpreter_builder.h"
#include "tensorflow/lite/core/shims/cc/kernels/register.h"
using namespace std;
using namespace tflite;
template<typename ElemType>
inline void printVecElems(const std::vector<ElemType> &mat, size_t elemCnt, const char *matName)
{
size_t cnt{};
bool brk = false;
std::cout << matName << ":" << std::endl;
std::cout << "[ ";
for (uint32_t row = 0; row < mat.size(); ++row)
{
std::cout << +mat.at(row);
if (row + 1 < mat.size())
std::cout << ", ";
++cnt;
if (cnt > elemCnt)
{
brk = true;
break;
}
}
if (brk)
std::cout << "..." << std::endl;
else
std::cout << " ]" << std::endl;
}
static int runTfTest()
{
cv::Mat image = cv::imread("~/input_videos/photo2.jpeg");
if(image.empty())
{
cout << "Can not read input file" << endl;
return -1;
}
cv::Mat dst;
cv::resize(image, dst, cv::Size2i(2049, 1025), 0, 0, cv::INTER_LINEAR);
if(dst.empty())
return -1;
cv::cvtColor(dst, dst, cv::COLOR_BGR2RGB);
cv::Mat flat = dst.reshape(1, dst.total()*dst.channels());
std::vector<float> frameForPrediction = dst.isContinuous()? flat : flat.clone();
for(auto& pt : frameForPrediction)
pt = (pt / 127.5) - 1.0;
printVecElems<float>(frameForPrediction, 100, "input_data");
// Create model from file. Note that the model instance must outlive the
// interpreter instance.
auto model = tflite::FlatBufferModel::BuildFromFile("~/models/lite-model_deeplabv3-mobilenetv3-cityscapes_1_default_2.tflite");
if (model == nullptr)
return -1;
// Create an Interpreter with an InterpreterBuilder.
std::unique_ptr<Interpreter> interpreter;
tflite::ops::builtin::BuiltinOpResolver resolver;
if (InterpreterBuilder(*model, resolver)(&interpreter) != kTfLiteOk)
return -1;
if (interpreter->AllocateTensors() != kTfLiteOk)
return -1;
auto inputData = interpreter->typed_input_tensor<float>(0);
for(uint32_t i=0; i<frameForPrediction.size(); ++i)
{
inputData[i] = frameForPrediction[i]; // TODO: Pass input data to input tensor without copy
}
if(interpreter->Invoke() != kTfLiteOk)
return -1;
auto outputData = interpreter->typed_output_tensor<float>(0);
const std::vector<int>& outputs = interpreter.get()->outputs();
TfLiteTensor* outDetails = interpreter.get()->tensor(outputs[0]);
std::vector<float> rawPrediction;
for(uint32_t i=0; i< outDetails->bytes / sizeof(float); ++i)
{
rawPrediction.push_back(outputData[i]); // TODO: Pass output tensor to output data without copy
}
printVecElems<float>(rawPrediction, 100, "raw_prediction");
return 0;
}
int main()
{
int ret {};
ret = runTfTest();
if(ret != 0)
cout << "Run failed." << endl;
return ret;
}