Local Python LLM server and Android App
In this video it shows the steps and code to create a local LLM server in Python. Then it create a simple Android App to call that LLM model to generate response for specific prompts.
In this approach one can create a local running LLM application which can be used for infinite number of prompts for FREE.
Please note, this model runs on the local CPU of the laptop so prefer smaller LLM model size accordingly which can easily run on limited CPU resource of a home laptop.
I hope you like this video. For any questions, suggestions or appreciation please contact us at: https://programmerworld.co/contact/ or email at: programmerworld1990@gmail.com
Details:
Python Code – for server:
from flask import Flask, request, jsonify
from transformers import pipeline
app = Flask(__name__)
# Load the model ONCE when the server starts (important for efficiency)
try:
# generator = pipeline('text-generation', model='EleutherAI/gpt-neo-125M')
# generator = pipeline('text-generation', model='gpt2-small')
generator = pipeline('text-generation', model='distilgpt2')
except Exception as e:
print(f"Error loading model: {e}")
exit(1) # Exit if the model fails to load
@app.route('/generate', methods=['POST'])
def generate_text():
try:
data = request.get_json()
prompt = data.get('prompt')
if not prompt:
return jsonify({'error': 'Prompt is required'}), 400
# Generate text (add truncation=True)
result = generator(prompt, max_length=100, truncation=True)
generated_text = result[0]['generated_text']
return jsonify({'result': generated_text})
except Exception as e:
print(f"Error during generation: {e}") # Log the error for debugging
return jsonify({'error': 'An error occurred during text generation'}), 500 # Return a 500 error
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=True) # host='0.0.0.0' is crucial # Set debug=False in production
Screenshot:

Output Logs:
(virtualpython) C:\Tools\Python\llama_server>python app.py
Device set to use cpu
* Serving Flask app 'app'
* Debug mode: on
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
Device set to use cpu
* Serving Flask app 'app'
* Debug mode: on
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Serving Flask app 'app'
* Debug mode: on
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Debug mode: on
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Running on all addresses (0.0.0.0)
* Running on http://127.0.0.1:5000
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
* Running on all addresses (0.0.0.0)
* Running on http://127.0.0.1:5000
* Running on all addresses (0.0.0.0)
* Running on http://127.0.0.1:5000
* Running on http://192.168.1.7:5000
Press CTRL+C to quit
* Restarting with stat
Device set to use cpu
* Debugger is active!
* Debugger PIN: 144-367-653
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
192.168.1.7 - - [08/Feb/2025 13:13:15] "POST /generate HTTP/1.1" 200 -
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
generation.
192.168.1.7 - - [08/Feb/2025 13:17:22] "POST /generate HTTP/1.1" 200 -
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
192.168.1.7 - - [08/Feb/2025 13:18:17] "POST /generate HTTP/1.1" 200 -
Android App/ Client side code:
package com.programmerworld.llmfromlocalpythonserver;
// How to create a local AI LLM server in python and call it from your Android App?
import android.os.Bundle;
import android.widget.EditText;
import android.widget.TextView;
import androidx.appcompat.app.AppCompatActivity;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import android.os.Handler;
import android.os.Looper;
public class MainActivity extends AppCompatActivity {
private EditText promptEditText;
private TextView resultTextView;
private ExecutorService executorService = Executors.newFixedThreadPool(4); // Create a thread pool
private Handler mainThreadHandler = new Handler(Looper.getMainLooper()); // Handler for UI updates
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
promptEditText = findViewById(R.id.prompt_edit_text);
resultTextView = findViewById(R.id.result_text_view);
findViewById(R.id.generate_button).setOnClickListener(v -> {
String prompt = promptEditText.getText().toString();
generateText(prompt);
});
}
private void generateText(String prompt) {
executorService.execute(() -> {
try {
URL url = new URL("http://192.168.1.7:5000/generate"); // Replace with your server URL
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setRequestMethod("POST");
connection.setRequestProperty("Content-Type", "application/json");
JSONObject json = new JSONObject();
json.put("prompt", prompt);
OutputStream os = connection.getOutputStream();
os.write(json.toString().getBytes());
os.flush();
os.close();
BufferedReader reader = new BufferedReader(new InputStreamReader(connection.getInputStream()));
String line;
StringBuilder response = new StringBuilder();
while ((line = reader.readLine()) != null) {
response.append(line);
}
reader.close();
connection.disconnect();
JSONObject resultJson = new JSONObject(response.toString());
String result = resultJson.getString("result");
// Update UI on the main thread
mainThreadHandler.post(() -> {
resultTextView.setText(result);
});
} catch (IOException | JSONException e) {
e.printStackTrace();
String error = "Error: " + e.getMessage();
mainThreadHandler.post(() -> {
resultTextView.setText(error);
});
}
});
}
@Override
protected void onDestroy() {
super.onDestroy();
executorService.shutdown(); // Important: Shut down the executor when the activity is destroyed
}
}
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools">
<uses-permission android:name="android.permission.INTERNET" />
<application
android:usesCleartextTraffic="true"
android:allowBackup="true"
android:dataExtractionRules="@xml/data_extraction_rules"
android:fullBackupContent="@xml/backup_rules"
android:icon="@mipmap/ic_launcher"
android:label="@string/app_name"
android:roundIcon="@mipmap/ic_launcher_round"
android:supportsRtl="true"
android:theme="@style/Theme.LLMFromLocalPythonServer"
tools:targetApi="31">
<activity
android:name=".MainActivity"
android:exported="true">
<intent-filter>
<action android:name="android.intent.action.MAIN" />
<category android:name="android.intent.category.LAUNCHER" />
</intent-filter>
</activity>
</application>
</manifest>
<?xml version="1.0" encoding="utf-8"?>
<androidx.constraintlayout.widget.ConstraintLayout xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:app="http://schemas.android.com/apk/res-auto"
xmlns:tools="http://schemas.android.com/tools"
android:id="@+id/main"
android:layout_width="match_parent"
android:layout_height="match_parent"
tools:context=".MainActivity">
<TextView
android:id="@+id/result_text_view"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_marginTop="84dp"
android:layout_marginBottom="428dp"
android:text="Output Response ..."
android:textSize="20sp"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintHorizontal_bias="0.5"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toBottomOf="@+id/generate_button" />
<Button
android:id="@+id/generate_button"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_marginTop="144dp"
android:text="Generate"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintHorizontal_bias="0.5"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toTopOf="parent" />
<EditText
android:id="@+id/prompt_edit_text"
android:layout_width="290dp"
android:layout_height="84dp"
android:layout_marginTop="36dp"
android:ems="10"
android:inputType="text"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintHorizontal_bias="0.5"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toTopOf="parent"
tools:text="Inpur String ..." />
</androidx.constraintlayout.widget.ConstraintLayout>
Screenshots:


Postman Output:


curl command:
curl --location 'http://192.168.1.7:5000/generate' \
--header 'Content-Type: application/json' \
--data '{
"prompt": "Write a poem on sun"
}'
Response:
{
"result": "Write a poem on sun.\nTo begin, begin:\n*\n*\nIt is said thus:\n*\n*\nThis poem has a long, difficult time.\nThere is a poem of a thousand words, each of them of which, in its place, it would become a proverb.\nThere is a poem of the hundred words that I shall write after them.\nThis poem is that my heart-strings, in which I had two different words:\n*\nBut"
}
Download the complete project files:
https://drive.google.com/drive/folders/18cKWHBYGi7PMEx6-Qs9sEWMFMVE_-I4C?usp=sharing