How to modify output from TFLite exported Yolov5s model to work with an android app built with kotlin?

Question

I want to load my TFLite exported Yolov5s model into my object detection android app. I followed this tutorial: https://www.youtube.com/watch?v=zs43IrWTzB0

However, my TFLite Yolov5 model outputs an array of shape [1, 25200, 9].

While, the expected output signature is the following 4 arrays: detection_boxes, detection_classes, detection_scores, and num_detections. According to https://www.tensorflow.org/lite/examples/object_detection/overview#output_signature.

How should I modify my code to make it loadable in this app?

This is the sample code my TF lite model provides:

val model = BestFp16.newInstance(context)

// Creates inputs for reference.
val inputFeature0 = TensorBuffer.createFixedSize(intArrayOf(1, 640, 640, 3), DataType.FLOAT32)
inputFeature0.loadBuffer(byteBuffer)

// Runs model inference and gets result.
val outputs = model.process(inputFeature0)
val outputFeature0 = outputs.outputFeature0AsTensorBuffer

// Releases model resources if no longer used.
model.close()

This is my MainActivity.kt (the commented part is where I tried to draw the border but it end up drawing a static border on the top left side of the screen and the app crashes after a few seconds):

package com.example.sightfulkotlin

import  android.annotation.SuppressLint
import android.content.Context
import android.content.pm.PackageManager
import android.graphics.*
import android.hardware.camera2.CameraCaptureSession
import android.hardware.camera2.CameraDevice
import android.hardware.camera2.CameraManager
import android.os.Bundle
import android.os.Handler
import android.os.HandlerThread
import android.view.Surface
import android.view.TextureView
import android.widget.ImageView
import androidx.appcompat.app.AppCompatActivity
import androidx.core.content.ContextCompat
import com.example.sightfulkotlin.ml.BestFp16
import org.tensorflow.lite.DataType
import org.tensorflow.lite.support.common.FileUtil
import org.tensorflow.lite.support.image.ImageProcessor
import org.tensorflow.lite.support.image.TensorImage
import org.tensorflow.lite.support.image.ops.ResizeOp
import org.tensorflow.lite.support.tensorbuffer.TensorBuffer



class MainActivity : AppCompatActivity() {

    var colors = listOf(
        Color.BLUE, Color.GREEN, Color.RED, Color.CYAN, Color.GRAY, Color.BLACK, Color.DKGRAY, Color.MAGENTA, Color.YELLOW, Color.LTGRAY, Color.WHITE)
    val paint = Paint()
    private lateinit var labels:List<String>
    lateinit var bitmap: Bitmap
    lateinit var imageView: ImageView
    lateinit var cameraDevice: CameraDevice
    lateinit var handler: Handler
    private lateinit var cameraManager: CameraManager
    lateinit var textureView: TextureView
    lateinit var model: BestFp16

    override fun onCreate(savedInstanceState: Bundle?) {
        super.onCreate(savedInstanceState)
        setContentView(R.layout.activity_main)

        getPermission()

        labels = FileUtil.loadLabels(this, "labels.txt")
        model = BestFp16.newInstance(this)

        var imageProcessor = ImageProcessor.Builder().add(ResizeOp(640, 640, ResizeOp.ResizeMethod.BILINEAR)).build()

        val handlerThread = HandlerThread("videoThread")
        handlerThread.start()
        handler = Handler(handlerThread.looper)

        paint.color = Color.GREEN

        imageView = findViewById(R.id.imageView)
        textureView = findViewById(R.id.textureView)
        textureView.surfaceTextureListener = object: TextureView.SurfaceTextureListener
        {
            override fun onSurfaceTextureAvailable(p0: SurfaceTexture, p1: Int, p2: Int) {
                openCamera()
            }

            override fun onSurfaceTextureSizeChanged(p0: SurfaceTexture, p1: Int, p2: Int) {
            }

            override fun onSurfaceTextureDestroyed(p0: SurfaceTexture): Boolean {
                return false
            }

            override fun onSurfaceTextureUpdated(p0: SurfaceTexture) {
                bitmap = textureView.bitmap!!

                var tensorImage = TensorImage(DataType.FLOAT32)
                tensorImage.load(bitmap)
                tensorImage = imageProcessor.process(tensorImage)

                val inputFeature0 = TensorBuffer.createFixedSize(intArrayOf(1, 640, 640, 3), DataType.FLOAT32)
                inputFeature0.loadBuffer(tensorImage.buffer)

                val outputs = model.process(inputFeature0)
                val outputFeature0 = outputs.outputFeature0AsTensorBuffer

                val mutable = bitmap.copy(Bitmap.Config.ARGB_8888, true)
                val canvas = Canvas(mutable)

                val h = bitmap.height
                val w = bitmap.width
                paint.textSize = h/15f
                paint.strokeWidth = h/85f

                /*val detection = outputFeature0.floatArray.copyOfRange(0, 9)

                  val numDetections = outputFeature0.shape[1]

                   for (i in 0 until numDetections) {
                       i * 9
                       val xCenter = detection[0]
                       val yCenter = detection[1]
                       val width = detection[2]
                       val height = detection[3]

                       val left = (xCenter - width/2) * w
                       val top = (yCenter - height/2) * h
                       val right = (xCenter + width/2) * w
                       val bottom = (yCenter + height/2) * h

                       canvas.drawRect(left, top, right, bottom, paint)
                   }*/

                imageView.setImageBitmap(mutable)

            }
        }

        cameraManager =  getSystemService(Context.CAMERA_SERVICE) as CameraManager
    }

    override fun onDestroy() {
        super.onDestroy()
        model.close()
    }

    @SuppressLint("MissingPermission")
    fun openCamera()
    {
        cameraManager.openCamera(cameraManager.cameraIdList[0], object: CameraDevice.StateCallback(){
            @SuppressLint("MissingPermission")
            override fun onOpened(p0: CameraDevice) {
                cameraDevice = p0

                var surfaceTexture = textureView.surfaceTexture
                var surface  = Surface(surfaceTexture)
                var captureRequest = cameraDevice.createCaptureRequest(CameraDevice.TEMPLATE_PREVIEW)
                captureRequest.addTarget(surface)

                cameraDevice.createCaptureSession(listOf(surface), object: CameraCaptureSession.StateCallback(){
                    override fun onConfigured(p0: CameraCaptureSession) {
                        p0.setRepeatingRequest(captureRequest.build(), null, null)
                    }

                    override fun onConfigureFailed(p0: CameraCaptureSession) {
                    }
                }, handler)
            }

            override fun onDisconnected(p0: CameraDevice) {
            }

            @SuppressLint("MissingPermission")
            override fun onError(p0: CameraDevice, p1: Int) {
            }
        },handler)
    }

    fun getPermission()
    {
        if(ContextCompat.checkSelfPermission(this, android.Manifest.permission.CAMERA)!=PackageManager.PERMISSION_GRANTED)
        {
            requestPermissions(arrayOf(android.Manifest.permission.CAMERA), 101)
        }
    }

    override fun onRequestPermissionsResult(
        requestCode: Int,
        permissions: Array<out String>,
        grantResults: IntArray
    ) {
        super.onRequestPermissionsResult(requestCode, permissions, grantResults)
        if (grantResults[0] != PackageManager.PERMISSION_GRANTED)
        {
            getPermission()
        }
    }
}

I have the same problema, but my output shape is [ 1, 25200, 7] — gbonesso, Jun 23 '23 at 19:41

Anna Khol'kina · Answer 1 · 2023-08-18T11:37:41.107

Your model should contain metadata that contains information about classes. The current ultralytics yolov5 Github repo does not support this conversion for object detection model that is able to add metadata and used on android later.

The reason is YOLOv5 exported models generally concatenate outputs into a single output. TFLite models do not export with NMS, only TF.js and pipelined CoreML models contains NMS. The information is taken from this article, and there is a solution to this problem. You can try this option, but it didn't work for me. Also possible solutions:

You can also try not to add metadata, but to use decompression of the output tensor. The code is here.
Rewrite the code in Java, there is an opportunity to load the model without metadata;
Train another model that supports metadata recording

How to modify output from TFLite exported Yolov5s model to work with an android app built with kotlin?

1 Answers1