This video shows the steps to create a voice-based image recognition Android App which uses AI capability from Google’s ML kit.
In this video it refers the code from our below page:
https://programmerworld.co/android/how-to-create-a-personal-voice-assistant-android-app-to-create-a-text-file-complete-source-code/
I hope you like this video. For any questions, suggestions or appreciation please contact us at: https://programmerworld.co/contact/ or email at: programmerworld1990@gmail.com
Complete source code and other details:
package com.programmerworld.voicebasedimagerecognitionapp;
import static android.Manifest.permission.RECORD_AUDIO;
import androidx.appcompat.app.AppCompatActivity;
import androidx.core.app.ActivityCompat;
import android.content.Intent;
import android.content.pm.PackageManager;
import android.graphics.Bitmap;
import android.graphics.BitmapFactory;
import android.os.Bundle;
import android.speech.RecognitionListener;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;
import android.speech.tts.TextToSpeech;
import android.view.View;
import android.widget.ImageView;
import android.widget.TextView;
import com.google.android.gms.tasks.OnSuccessListener;
import com.google.mlkit.vision.common.InputImage;
import com.google.mlkit.vision.label.ImageLabel;
import com.google.mlkit.vision.label.ImageLabeler;
import com.google.mlkit.vision.label.ImageLabeling;
import com.google.mlkit.vision.label.defaults.ImageLabelerOptions;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
public class MainActivity extends AppCompatActivity {
private SpeechRecognizer speechRecognizer;
private TextToSpeech textToSpeech;
private TextView textView;
private Intent intent;
private ImageView imageView1, imageView2;
private Bitmap bitmap1, bitmap2;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
ActivityCompat.requestPermissions(this,
new String[]{RECORD_AUDIO},
PackageManager.PERMISSION_GRANTED);
textView = findViewById(R.id.textView);
imageView1 = findViewById(R.id.imageView1);
imageView2 = findViewById(R.id.imageView2);
try {
InputStream inputStream1 = getAssets().open("flower1.jpg");
InputStream inputStream2 = getAssets().open("butterfly.jpg");
bitmap1 = BitmapFactory.decodeStream(inputStream1);
bitmap2 = BitmapFactory.decodeStream(inputStream2);
imageView1.setImageBitmap(bitmap1);
imageView2.setImageBitmap(bitmap2);
} catch (IOException e) {
throw new RuntimeException(e);
}
textToSpeech = new TextToSpeech(this, new TextToSpeech.OnInitListener() {
@Override
public void onInit(int status) {
}
});
intent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
intent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL, RecognizerIntent.LANGUAGE_MODEL_FREE_FORM);
speechRecognizer = SpeechRecognizer.createSpeechRecognizer(this);
speechRecognizer.setRecognitionListener(new RecognitionListener() {
@Override
public void onReadyForSpeech(Bundle bundle) {
}
@Override
public void onBeginningOfSpeech() {
}
@Override
public void onRmsChanged(float v) {
}
@Override
public void onBufferReceived(byte[] bytes) {
}
@Override
public void onEndOfSpeech() {
}
@Override
public void onError(int i) {
}
@Override
public void onResults(Bundle bundle) {
ArrayList<String> matches = bundle.getStringArrayList(speechRecognizer.RESULTS_RECOGNITION);
String string = "";
textView.setText("");
if (matches != null){
string = matches.get(0);
textView.setText(string);
if (string.contains("first")){
FirstImageRecognition();
} else if (string.contains("second")){
SecondImageRecognition();
}else {
textView.setText("Sorry, couldn't understand. Please try again.");
textToSpeech.speak("Sorry, couldn't understand. Please try again.", TextToSpeech.QUEUE_FLUSH, null, null);
try {
Thread.sleep(3000);
} catch (InterruptedException e) {
e.printStackTrace();
}
speechRecognizer.startListening(intent);
}
}
}
@Override
public void onPartialResults(Bundle bundle) {
}
@Override
public void onEvent(int i, Bundle bundle) {
}
});
}
public void buttonStartRecognition(View view){
textView.setText("Please tell me, how can I help you?");
textToSpeech.speak("Please tell me, how can I help you?", TextToSpeech.QUEUE_FLUSH, null, null);
try {
Thread.sleep(3000);
} catch (InterruptedException e) {
e.printStackTrace();
}
speechRecognizer.startListening(intent);
}
private void FirstImageRecognition(){
ImageLabeler imageLabeler = ImageLabeling.getClient(ImageLabelerOptions.DEFAULT_OPTIONS);
InputImage inputImage = InputImage.fromBitmap(bitmap1, 0);
imageLabeler.process(inputImage).addOnSuccessListener(new OnSuccessListener<List<ImageLabel>>() {
@Override
public void onSuccess(List<ImageLabel> imageLabels) {
String stringImageRecognized = "Sure, the first image is " + imageLabels.get(1).getText() + ".";
textView.setText(stringImageRecognized);
textToSpeech.speak(stringImageRecognized, TextToSpeech.QUEUE_FLUSH, null, null);
}
});
}
private void SecondImageRecognition(){
ImageLabeler imageLabeler = ImageLabeling.getClient(ImageLabelerOptions.DEFAULT_OPTIONS);
InputImage inputImage = InputImage.fromBitmap(bitmap2, 0);
imageLabeler.process(inputImage).addOnSuccessListener(new OnSuccessListener<List<ImageLabel>>() {
@Override
public void onSuccess(List<ImageLabel> imageLabels) {
String stringImageRecognized = "Sure, the second image is " + imageLabels.get(1).getText() + ".";
textView.setText(stringImageRecognized);
textToSpeech.speak(stringImageRecognized, TextToSpeech.QUEUE_FLUSH, null, null);
}
});
}
}
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools">
<uses-permission android:name="android.permission.RECORD_AUDIO"/>
<uses-permission android:name="android.permission.INTERNET"/>
<application
android:allowBackup="true"
android:dataExtractionRules="@xml/data_extraction_rules"
android:fullBackupContent="@xml/backup_rules"
android:icon="@mipmap/ic_launcher"
android:label="@string/app_name"
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:theme="@style/Theme.VoiceBasedImageRecognitionApp"
tools:targetApi="31">
<activity
android:name=".MainActivity"
android:exported="true">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>
</manifest>
plugins {
id("com.android.application")
}
android {
namespace = "com.programmerworld.voicebasedimagerecognitionapp"
compileSdk = 34
defaultConfig {
applicationId = "com.programmerworld.voicebasedimagerecognitionapp"
minSdk = 33
targetSdk = 34
versionCode = 1
versionName = "1.0"
testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner"
}
buildTypes {
release {
isMinifyEnabled = false
proguardFiles(
getDefaultProguardFile("proguard-android-optimize.txt"),
"proguard-rules.pro"
)
}
}
compileOptions {
sourceCompatibility = JavaVersion.VERSION_1_8
targetCompatibility = JavaVersion.VERSION_1_8
}
}
dependencies {
implementation("androidx.appcompat:appcompat:1.6.1")
implementation("com.google.android.material:material:1.11.0")
implementation("androidx.constraintlayout:constraintlayout:2.1.4")
testImplementation("junit:junit:4.13.2")
androidTestImplementation("androidx.test.ext:junit:1.1.5")
androidTestImplementation("androidx.test.espresso:espresso-core:3.5.1")
implementation("com.google.mlkit:image-labeling:17.0.8")
}
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:layout_width="match_parent"
android:layout_height="match_parent"
tools:context=".MainActivity">
<TextView
android:id="@+id/textView"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:text="Hello World!"
android:textSize="34sp"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toTopOf="parent" />
<Button
android:id="@+id/button"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_marginStart="112dp"
android:layout_marginTop="16dp"
android:onClick="buttonStartRecognition"
android:text="Start Image Recognition"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toTopOf="parent" />
<ImageView
android:id="@+id/imageView1"
android:layout_width="162dp"
android:layout_height="133dp"
android:layout_marginStart="17dp"
android:layout_marginTop="38dp"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toBottomOf="@+id/button"
app:srcCompat="@drawable/ic_launcher_background" />
<ImageView
android:id="@+id/imageView2"
android:layout_width="148dp"
android:layout_height="133dp"
android:layout_marginStart="50dp"
android:layout_marginTop="36dp"
app:layout_constraintStart_toEndOf="@+id/imageView1"
app:layout_constraintTop_toBottomOf="@+id/button"
app:srcCompat="@drawable/ic_launcher_background" />
</androidx.constraintlayout.widget.ConstraintLayout>
Images used in this demo:
Screenshots:
Project Folder:
Complete project folder can accessed from the below link on payment of USD 9.
https://drive.google.com/file/d/1WO0x4DEfcptzm4awBhP-aBZuoFa6ga9l/view?usp=drive_link
Excerpt:
This video demonstrates creating a voice-based image recognition Android App using Google’s ML kit. The app uses speech recognition to label images and provide a spoken response. The tutorial provides step-by-step guidance and full source code. If interested in accessing the complete project folder, there’s a purchase option for $10, with access instructions available upon payment confirmation. Find the video tutorial and more details at the provided links. Contact for queries or further assistance.