From 0fef18eb0670e4747c955bbae4cbd21477d90c62 Mon Sep 17 00:00:00 2001 From: Jeff Emmett Date: Sat, 6 Dec 2025 22:46:45 -0800 Subject: [PATCH] Initial commit: Native Android voice transcription app MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Features: - On-device Whisper transcription via sherpa-onnx - Kotlin + Jetpack Compose UI - Multiple trigger methods: - Floating button overlay - Volume button combo (Accessibility Service) - Quick Settings tile - Smart action routing: - Copy to clipboard - Share via apps - Save as markdown note - Create task (Backlog.md compatible) - Intent detection for suggested actions Requires Android 10+ (API 29) 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- .gitignore | 29 ++ README.md | 192 +++++++++++ app/build.gradle.kts | 84 +++++ app/proguard-rules.pro | 21 ++ app/src/main/AndroidManifest.xml | 109 ++++++ .../jeffemmett/voicecommand/MainActivity.kt | 319 ++++++++++++++++++ .../voicecommand/VoiceCommandApp.kt | 62 ++++ .../voicecommand/action/ActionRouter.kt | 245 ++++++++++++++ .../voicecommand/audio/AudioRecorder.kt | 223 ++++++++++++ .../voicecommand/service/BootReceiver.kt | 33 ++ .../service/FloatingButtonService.kt | 206 +++++++++++ .../service/VoiceCommandTileService.kt | 124 +++++++ .../VolumeButtonAccessibilityService.kt | 175 ++++++++++ .../stt/SherpaTranscriptionEngine.kt | 270 +++++++++++++++ .../voicecommand/ui/RecordingScreen.kt | 301 +++++++++++++++++ .../ui/TranscriptionResultActivity.kt | 221 ++++++++++++ .../jeffemmett/voicecommand/ui/theme/Theme.kt | 70 ++++ app/src/main/res/drawable/ic_mic.xml | 11 + app/src/main/res/drawable/ic_stop.xml | 11 + app/src/main/res/values/strings.xml | 39 +++ app/src/main/res/values/themes.xml | 14 + .../res/xml/accessibility_service_config.xml | 10 + backlog/config.yml | 13 + ...001 - Download-and-bundle-Whisper-model.md | 27 ++ .../task-002 - Build-and-test-debug-APK.md | 36 ++ build.gradle.kts | 6 + download-models.sh | 37 ++ gradle/libs.versions.toml | 31 ++ gradle/wrapper/gradle-wrapper.properties | 7 + settings.gradle.kts | 19 ++ 30 files changed, 2945 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 app/build.gradle.kts create mode 100644 app/proguard-rules.pro create mode 100644 app/src/main/AndroidManifest.xml create mode 100644 app/src/main/java/com/jeffemmett/voicecommand/MainActivity.kt create mode 100644 app/src/main/java/com/jeffemmett/voicecommand/VoiceCommandApp.kt create mode 100644 app/src/main/java/com/jeffemmett/voicecommand/action/ActionRouter.kt create mode 100644 app/src/main/java/com/jeffemmett/voicecommand/audio/AudioRecorder.kt create mode 100644 app/src/main/java/com/jeffemmett/voicecommand/service/BootReceiver.kt create mode 100644 app/src/main/java/com/jeffemmett/voicecommand/service/FloatingButtonService.kt create mode 100644 app/src/main/java/com/jeffemmett/voicecommand/service/VoiceCommandTileService.kt create mode 100644 app/src/main/java/com/jeffemmett/voicecommand/service/VolumeButtonAccessibilityService.kt create mode 100644 app/src/main/java/com/jeffemmett/voicecommand/stt/SherpaTranscriptionEngine.kt create mode 100644 app/src/main/java/com/jeffemmett/voicecommand/ui/RecordingScreen.kt create mode 100644 app/src/main/java/com/jeffemmett/voicecommand/ui/TranscriptionResultActivity.kt create mode 100644 app/src/main/java/com/jeffemmett/voicecommand/ui/theme/Theme.kt create mode 100644 app/src/main/res/drawable/ic_mic.xml create mode 100644 app/src/main/res/drawable/ic_stop.xml create mode 100644 app/src/main/res/values/strings.xml create mode 100644 app/src/main/res/values/themes.xml create mode 100644 app/src/main/res/xml/accessibility_service_config.xml create mode 100644 backlog/config.yml create mode 100644 backlog/tasks/task-001 - Download-and-bundle-Whisper-model.md create mode 100644 backlog/tasks/task-002 - Build-and-test-debug-APK.md create mode 100644 build.gradle.kts create mode 100755 download-models.sh create mode 100644 gradle/libs.versions.toml create mode 100644 gradle/wrapper/gradle-wrapper.properties create mode 100644 settings.gradle.kts diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..699f9a8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,29 @@ +# Android/Gradle +*.iml +.gradle/ +local.properties +.idea/ +*.hprof +build/ +captures/ +.externalNativeBuild/ +.cxx/ +*.apk +*.aab +*.ap_ +*.dex + +# Kotlin +*.class + +# OS +.DS_Store +Thumbs.db + +# Models (downloaded separately) +app/src/main/assets/models/*.onnx + +# Signing +*.jks +*.keystore +keystore.properties diff --git a/README.md b/README.md new file mode 100644 index 0000000..6afd198 --- /dev/null +++ b/README.md @@ -0,0 +1,192 @@ +# Voice Command - Native Android App + +A fully integrated Android app for voice-to-text transcription with on-device Whisper processing. No server required, no Termux, no additional apps needed. + +## Features + +- **100% On-Device Transcription** - Uses sherpa-onnx with Whisper models +- **Privacy-First** - All processing happens locally, no data leaves your device +- **Multiple Trigger Methods**: + - Floating button overlay (always accessible) + - Volume button combo (press both volumes) + - Quick Settings tile (notification shade) +- **Smart Routing**: + - Copy to clipboard + - Share via any app + - Save as markdown note + - Create task (Backlog.md compatible) +- **Intent Detection** - Automatically suggests best action based on content + +## Requirements + +- Android 10 (API 29) or higher +- ~100-250MB storage for Whisper model +- Microphone permission + +## Installation + +### From APK (Recommended) + +1. Download the latest APK from releases +2. Enable "Install from unknown sources" if prompted +3. Install and open Voice Command +4. Grant microphone permission +5. Wait for model download (~40-250MB depending on selected model) + +### Build from Source + +```bash +# Clone the repository +git clone https://gitea.jeffemmett.com/jeffemmett/voice-command.git +cd voice-command/android-native + +# Build debug APK +./gradlew assembleDebug + +# Build release APK (requires signing config) +./gradlew assembleRelease +``` + +The APK will be in `app/build/outputs/apk/` + +## Usage + +### Quick Start + +1. **Open the app** and grant microphone permission +2. **Tap the big mic button** to start recording +3. **Speak your note or task** +4. **Tap again to stop** - transcription happens automatically +5. **Choose an action** from the menu + +### Trigger Methods + +#### Floating Button +- Enable in Settings +- Drag to reposition +- Tap to start/stop recording +- Works over any app + +#### Volume Buttons +- Enable Accessibility Service in Settings +- Press Volume Up + Volume Down simultaneously +- Vibration confirms recording start/stop + +#### Quick Settings Tile +- Swipe down notification shade +- Add "Voice Note" tile +- Tap tile to toggle recording + +## Models + +| Model | Size | Languages | Quality | +|-------|------|-----------|---------| +| Tiny English | ~40MB | English only | Good for quick notes | +| Base English | ~75MB | English only | Better accuracy | +| Small English | ~250MB | English only | Best accuracy | +| Tiny | ~40MB | Multilingual | Basic quality | +| Base | ~75MB | Multilingual | Good quality | +| Small | ~250MB | Multilingual | Best quality | + +## Architecture + +``` +┌─────────────────────────────────────────────────────┐ +│ Voice Command App │ +├─────────────────────────────────────────────────────┤ +│ UI Layer (Jetpack Compose) │ +│ ├── MainActivity (main interface) │ +│ ├── RecordingScreen (recording controls) │ +│ └── TranscriptionResultActivity (result dialog) │ +├─────────────────────────────────────────────────────┤ +│ Service Layer │ +│ ├── FloatingButtonService (overlay) │ +│ ├── VolumeButtonAccessibilityService (vol combo) │ +│ └── VoiceCommandTileService (Quick Settings) │ +├─────────────────────────────────────────────────────┤ +│ Core Layer │ +│ ├── AudioRecorder (16kHz PCM capture) │ +│ ├── SherpaTranscriptionEngine (Whisper wrapper) │ +│ └── ActionRouter (clipboard, files, share) │ +├─────────────────────────────────────────────────────┤ +│ Native Layer (sherpa-onnx) │ +│ └── Whisper ONNX models + ONNX Runtime │ +└─────────────────────────────────────────────────────┘ +``` + +## Permissions + +| Permission | Purpose | +|------------|---------| +| `RECORD_AUDIO` | Voice recording | +| `SYSTEM_ALERT_WINDOW` | Floating button overlay | +| `FOREGROUND_SERVICE` | Background recording | +| `POST_NOTIFICATIONS` | Service notifications | +| `VIBRATE` | Recording feedback | + +## Output Formats + +### Notes (Markdown) +```markdown +# Voice Note Title + +Your transcribed text here... + +--- +Created: 2025-12-06 14:30 +Source: voice +``` + +### Tasks (Backlog.md Compatible) +```markdown +--- +title: Task Title +status: To Do +priority: medium +created: 2025-12-06T14:30:00 +source: voice +--- + +# Task Title + +Your transcribed text here... +``` + +## Troubleshooting + +### Model won't load +- Ensure sufficient storage (~250MB free) +- Check internet connection for initial download +- Try a smaller model (Tiny instead of Small) + +### Recording not working +- Check microphone permission is granted +- Ensure no other app is using microphone +- Try restarting the app + +### Volume buttons not detected +- Enable Accessibility Service in Android Settings +- Grant all requested permissions +- Some custom ROMs may block this feature + +### Floating button not appearing +- Enable "Display over other apps" permission +- Check notification for "Floating Button Active" +- Some launchers may hide overlays + +## Privacy + +- **All transcription happens on-device** +- No audio or text is sent to any server +- No analytics or tracking +- Notes/tasks saved only to local storage + +## Credits + +- [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) - On-device speech recognition +- [OpenAI Whisper](https://openai.com/research/whisper) - Original Whisper model +- [Jetpack Compose](https://developer.android.com/compose) - Modern Android UI + +## License + +MIT diff --git a/app/build.gradle.kts b/app/build.gradle.kts new file mode 100644 index 0000000..0ab3941 --- /dev/null +++ b/app/build.gradle.kts @@ -0,0 +1,84 @@ +plugins { + alias(libs.plugins.android.application) + alias(libs.plugins.kotlin.android) + alias(libs.plugins.kotlin.compose) +} + +android { + namespace = "com.jeffemmett.voicecommand" + compileSdk = 35 + + defaultConfig { + applicationId = "com.jeffemmett.voicecommand" + minSdk = 29 // Android 10 + targetSdk = 35 + versionCode = 1 + versionName = "1.0.0" + + testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" + + // Enable native libs extraction for sherpa-onnx + ndk { + abiFilters += listOf("arm64-v8a", "armeabi-v7a", "x86_64") + } + } + + buildTypes { + release { + isMinifyEnabled = true + isShrinkResources = true + proguardFiles( + getDefaultProguardFile("proguard-android-optimize.txt"), + "proguard-rules.pro" + ) + } + debug { + isMinifyEnabled = false + } + } + + compileOptions { + sourceCompatibility = JavaVersion.VERSION_17 + targetCompatibility = JavaVersion.VERSION_17 + } + + kotlinOptions { + jvmTarget = "17" + } + + buildFeatures { + compose = true + } + + packaging { + resources { + excludes += "/META-INF/{AL2.0,LGPL2.1}" + } + // Don't compress model files + jniLibs { + useLegacyPackaging = true + } + } +} + +dependencies { + implementation(libs.androidx.core.ktx) + implementation(libs.androidx.lifecycle.runtime.ktx) + implementation(libs.androidx.lifecycle.viewmodel.compose) + implementation(libs.androidx.activity.compose) + implementation(libs.androidx.datastore.preferences) + implementation(libs.kotlinx.coroutines.android) + + // Compose + implementation(platform(libs.androidx.compose.bom)) + implementation(libs.androidx.ui) + implementation(libs.androidx.ui.graphics) + implementation(libs.androidx.ui.tooling.preview) + implementation(libs.androidx.material3) + implementation(libs.androidx.material.icons.extended) + + // Sherpa-ONNX for on-device speech recognition + implementation(libs.sherpa.onnx) + + debugImplementation(libs.androidx.ui.tooling) +} diff --git a/app/proguard-rules.pro b/app/proguard-rules.pro new file mode 100644 index 0000000..e8ef4c0 --- /dev/null +++ b/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Voice Command ProGuard Rules + +# Keep sherpa-onnx native methods +-keep class com.k2fsa.sherpa.onnx.** { *; } +-keepclassmembers class com.k2fsa.sherpa.onnx.** { *; } + +# Keep native methods +-keepclasseswithmembernames class * { + native ; +} + +# Keep Kotlin metadata for reflection +-keepattributes *Annotation* +-keepattributes RuntimeVisibleAnnotations + +# Keep coroutines +-keepnames class kotlinx.coroutines.** { *; } + +# Keep Compose +-keep class androidx.compose.** { *; } +-keepclassmembers class androidx.compose.** { *; } diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml new file mode 100644 index 0000000..a41343f --- /dev/null +++ b/app/src/main/AndroidManifest.xml @@ -0,0 +1,109 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/app/src/main/java/com/jeffemmett/voicecommand/MainActivity.kt b/app/src/main/java/com/jeffemmett/voicecommand/MainActivity.kt new file mode 100644 index 0000000..58776a7 --- /dev/null +++ b/app/src/main/java/com/jeffemmett/voicecommand/MainActivity.kt @@ -0,0 +1,319 @@ +package com.jeffemmett.voicecommand + +import android.Manifest +import android.content.Intent +import android.net.Uri +import android.os.Bundle +import android.provider.Settings +import androidx.activity.ComponentActivity +import androidx.activity.compose.setContent +import androidx.activity.enableEdgeToEdge +import androidx.activity.result.contract.ActivityResultContracts +import androidx.compose.foundation.layout.* +import androidx.compose.foundation.rememberScrollState +import androidx.compose.foundation.verticalScroll +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.filled.* +import androidx.compose.material3.* +import androidx.compose.runtime.* +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.platform.LocalContext +import androidx.compose.ui.text.style.TextAlign +import androidx.compose.ui.unit.dp +import androidx.lifecycle.compose.collectAsStateWithLifecycle +import com.jeffemmett.voicecommand.audio.AudioRecorder +import com.jeffemmett.voicecommand.service.FloatingButtonService +import com.jeffemmett.voicecommand.stt.SherpaTranscriptionEngine +import com.jeffemmett.voicecommand.ui.RecordingScreen +import com.jeffemmett.voicecommand.ui.theme.VoiceCommandTheme +import kotlinx.coroutines.launch + +class MainActivity : ComponentActivity() { + + private val requestPermissionLauncher = registerForActivityResult( + ActivityResultContracts.RequestPermission() + ) { isGranted -> + // Permission result handled in compose state + } + + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + enableEdgeToEdge() + + setContent { + VoiceCommandTheme { + Surface( + modifier = Modifier.fillMaxSize(), + color = MaterialTheme.colorScheme.background + ) { + MainScreen( + onRequestMicPermission = { + requestPermissionLauncher.launch(Manifest.permission.RECORD_AUDIO) + }, + onRequestOverlayPermission = { + val intent = Intent( + Settings.ACTION_MANAGE_OVERLAY_PERMISSION, + Uri.parse("package:$packageName") + ) + startActivity(intent) + }, + onOpenAccessibilitySettings = { + startActivity(Intent(Settings.ACTION_ACCESSIBILITY_SETTINGS)) + } + ) + } + } + } + } +} + +@OptIn(ExperimentalMaterial3Api::class) +@Composable +fun MainScreen( + onRequestMicPermission: () -> Unit, + onRequestOverlayPermission: () -> Unit, + onOpenAccessibilitySettings: () -> Unit +) { + val context = LocalContext.current + val app = VoiceCommandApp.getInstance() + val scope = rememberCoroutineScope() + + val audioRecorder = remember { AudioRecorder(context) } + val recordingState by audioRecorder.state.collectAsStateWithLifecycle() + val engineState by app.transcriptionEngine.state.collectAsStateWithLifecycle() + + var showSettings by remember { mutableStateOf(false) } + var floatingButtonEnabled by remember { mutableStateOf(false) } + + // Check permissions + val hasMicPermission = audioRecorder.hasPermission() + val hasOverlayPermission = Settings.canDrawOverlays(context) + + // Initialize engine on first launch + LaunchedEffect(Unit) { + if (engineState is SherpaTranscriptionEngine.EngineState.NotInitialized) { + app.transcriptionEngine.initialize() + } + } + + Scaffold( + topBar = { + TopAppBar( + title = { Text("Voice Command") }, + actions = { + IconButton(onClick = { showSettings = !showSettings }) { + Icon(Icons.Default.Settings, contentDescription = "Settings") + } + } + ) + } + ) { paddingValues -> + Column( + modifier = Modifier + .fillMaxSize() + .padding(paddingValues) + .verticalScroll(rememberScrollState()) + .padding(16.dp), + horizontalAlignment = Alignment.CenterHorizontally + ) { + // Status Card + StatusCard( + engineState = engineState, + hasMicPermission = hasMicPermission, + hasOverlayPermission = hasOverlayPermission, + onRequestMicPermission = onRequestMicPermission, + onRequestOverlayPermission = onRequestOverlayPermission + ) + + Spacer(modifier = Modifier.height(24.dp)) + + // Recording Section + if (hasMicPermission && app.transcriptionEngine.isReady()) { + RecordingScreen( + audioRecorder = audioRecorder, + transcriptionEngine = app.transcriptionEngine + ) + } + + Spacer(modifier = Modifier.height(24.dp)) + + // Trigger Options + TriggerOptionsCard( + floatingButtonEnabled = floatingButtonEnabled, + onFloatingButtonToggle = { enabled -> + floatingButtonEnabled = enabled + if (enabled && hasOverlayPermission) { + context.startService(Intent(context, FloatingButtonService::class.java)) + } else { + context.stopService(Intent(context, FloatingButtonService::class.java)) + } + }, + hasOverlayPermission = hasOverlayPermission, + onRequestOverlayPermission = onRequestOverlayPermission, + onOpenAccessibilitySettings = onOpenAccessibilitySettings + ) + } + } +} + +@Composable +fun StatusCard( + engineState: SherpaTranscriptionEngine.EngineState, + hasMicPermission: Boolean, + hasOverlayPermission: Boolean, + onRequestMicPermission: () -> Unit, + onRequestOverlayPermission: () -> Unit +) { + Card( + modifier = Modifier.fillMaxWidth(), + colors = CardDefaults.cardColors( + containerColor = when (engineState) { + is SherpaTranscriptionEngine.EngineState.Ready -> MaterialTheme.colorScheme.primaryContainer + is SherpaTranscriptionEngine.EngineState.Error -> MaterialTheme.colorScheme.errorContainer + else -> MaterialTheme.colorScheme.surfaceVariant + } + ) + ) { + Column( + modifier = Modifier.padding(16.dp), + horizontalAlignment = Alignment.CenterHorizontally + ) { + // Engine status + Row( + verticalAlignment = Alignment.CenterVertically, + horizontalArrangement = Arrangement.Center + ) { + when (engineState) { + is SherpaTranscriptionEngine.EngineState.Ready -> { + Icon(Icons.Default.CheckCircle, "Ready", tint = MaterialTheme.colorScheme.primary) + Spacer(Modifier.width(8.dp)) + Text("Transcription Engine Ready") + } + is SherpaTranscriptionEngine.EngineState.Initializing -> { + CircularProgressIndicator(modifier = Modifier.size(20.dp)) + Spacer(Modifier.width(8.dp)) + Text("Loading model...") + } + is SherpaTranscriptionEngine.EngineState.Downloading -> { + CircularProgressIndicator( + progress = { engineState.progress }, + modifier = Modifier.size(20.dp) + ) + Spacer(Modifier.width(8.dp)) + Text("Downloading ${engineState.modelName}...") + } + is SherpaTranscriptionEngine.EngineState.Error -> { + Icon(Icons.Default.Error, "Error", tint = MaterialTheme.colorScheme.error) + Spacer(Modifier.width(8.dp)) + Text(engineState.message, color = MaterialTheme.colorScheme.error) + } + is SherpaTranscriptionEngine.EngineState.NotInitialized -> { + Icon(Icons.Default.HourglassEmpty, "Waiting") + Spacer(Modifier.width(8.dp)) + Text("Initializing...") + } + } + } + + // Permission warnings + if (!hasMicPermission) { + Spacer(Modifier.height(12.dp)) + OutlinedButton(onClick = onRequestMicPermission) { + Icon(Icons.Default.Mic, null) + Spacer(Modifier.width(8.dp)) + Text("Grant Microphone Permission") + } + } + } + } +} + +@Composable +fun TriggerOptionsCard( + floatingButtonEnabled: Boolean, + onFloatingButtonToggle: (Boolean) -> Unit, + hasOverlayPermission: Boolean, + onRequestOverlayPermission: () -> Unit, + onOpenAccessibilitySettings: () -> Unit +) { + Card(modifier = Modifier.fillMaxWidth()) { + Column(modifier = Modifier.padding(16.dp)) { + Text( + "Recording Triggers", + style = MaterialTheme.typography.titleMedium, + modifier = Modifier.padding(bottom = 12.dp) + ) + + // Floating Button + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.SpaceBetween, + verticalAlignment = Alignment.CenterVertically + ) { + Column(modifier = Modifier.weight(1f)) { + Text("Floating Button") + Text( + "Show always-visible mic button", + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant + ) + } + if (hasOverlayPermission) { + Switch( + checked = floatingButtonEnabled, + onCheckedChange = onFloatingButtonToggle + ) + } else { + TextButton(onClick = onRequestOverlayPermission) { + Text("Enable") + } + } + } + + HorizontalDivider(modifier = Modifier.padding(vertical = 12.dp)) + + // Volume Buttons + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.SpaceBetween, + verticalAlignment = Alignment.CenterVertically + ) { + Column(modifier = Modifier.weight(1f)) { + Text("Volume Button Trigger") + Text( + "Press both volume buttons to record", + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant + ) + } + TextButton(onClick = onOpenAccessibilitySettings) { + Text("Setup") + } + } + + HorizontalDivider(modifier = Modifier.padding(vertical = 12.dp)) + + // Quick Settings Tile + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.SpaceBetween, + verticalAlignment = Alignment.CenterVertically + ) { + Column(modifier = Modifier.weight(1f)) { + Text("Quick Settings Tile") + Text( + "Add tile to notification shade", + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant + ) + } + Icon( + Icons.Default.CheckCircle, + "Available", + tint = MaterialTheme.colorScheme.primary + ) + } + } + } +} diff --git a/app/src/main/java/com/jeffemmett/voicecommand/VoiceCommandApp.kt b/app/src/main/java/com/jeffemmett/voicecommand/VoiceCommandApp.kt new file mode 100644 index 0000000..6cd71fd --- /dev/null +++ b/app/src/main/java/com/jeffemmett/voicecommand/VoiceCommandApp.kt @@ -0,0 +1,62 @@ +package com.jeffemmett.voicecommand + +import android.app.Application +import android.app.NotificationChannel +import android.app.NotificationManager +import android.content.Context +import com.jeffemmett.voicecommand.stt.SherpaTranscriptionEngine + +class VoiceCommandApp : Application() { + + lateinit var transcriptionEngine: SherpaTranscriptionEngine + private set + + override fun onCreate() { + super.onCreate() + instance = this + + createNotificationChannels() + initializeTranscriptionEngine() + } + + private fun createNotificationChannels() { + val notificationManager = getSystemService(NotificationManager::class.java) + + // Recording channel + val recordingChannel = NotificationChannel( + CHANNEL_RECORDING, + getString(R.string.notification_channel_recording), + NotificationManager.IMPORTANCE_LOW + ).apply { + description = "Shows when voice recording is active" + setShowBadge(false) + } + + // Overlay channel + val overlayChannel = NotificationChannel( + CHANNEL_OVERLAY, + getString(R.string.notification_channel_overlay), + NotificationManager.IMPORTANCE_MIN + ).apply { + description = "Floating button service notification" + setShowBadge(false) + } + + notificationManager.createNotificationChannels(listOf(recordingChannel, overlayChannel)) + } + + private fun initializeTranscriptionEngine() { + transcriptionEngine = SherpaTranscriptionEngine(this) + } + + companion object { + const val CHANNEL_RECORDING = "voice_recording" + const val CHANNEL_OVERLAY = "floating_overlay" + + private lateinit var instance: VoiceCommandApp + + fun getInstance(): VoiceCommandApp = instance + + fun getAppContext(): Context = instance.applicationContext + } +} diff --git a/app/src/main/java/com/jeffemmett/voicecommand/action/ActionRouter.kt b/app/src/main/java/com/jeffemmett/voicecommand/action/ActionRouter.kt new file mode 100644 index 0000000..82d7ef8 --- /dev/null +++ b/app/src/main/java/com/jeffemmett/voicecommand/action/ActionRouter.kt @@ -0,0 +1,245 @@ +package com.jeffemmett.voicecommand.action + +import android.content.ClipData +import android.content.ClipboardManager +import android.content.Context +import android.content.Intent +import android.os.Environment +import android.util.Log +import android.widget.Toast +import java.io.File +import java.text.SimpleDateFormat +import java.util.Date +import java.util.Locale + +/** + * Routes transcription results to various destinations. + */ +class ActionRouter(private val context: Context) { + + companion object { + private const val TAG = "ActionRouter" + } + + sealed class Action(val displayName: String, val icon: String) { + data object Copy : Action("Copy to Clipboard", "content_copy") + data object Share : Action("Share", "share") + data object SaveNote : Action("Save as Note", "note_add") + data object CreateTask : Action("Create Task", "task_alt") + data object Dismiss : Action("Dismiss", "close") + } + + /** + * Copy text to clipboard. + */ + fun copyToClipboard(text: String): Boolean { + return try { + val clipboard = context.getSystemService(Context.CLIPBOARD_SERVICE) as ClipboardManager + val clip = ClipData.newPlainText("Voice Transcription", text) + clipboard.setPrimaryClip(clip) + showToast("Copied to clipboard") + true + } catch (e: Exception) { + Log.e(TAG, "Failed to copy to clipboard", e) + false + } + } + + /** + * Share text via Android share sheet. + */ + fun share(text: String, title: String = "Voice Note") { + val sendIntent = Intent().apply { + action = Intent.ACTION_SEND + putExtra(Intent.EXTRA_TEXT, text) + putExtra(Intent.EXTRA_SUBJECT, title) + type = "text/plain" + } + + val shareIntent = Intent.createChooser(sendIntent, "Share voice note") + shareIntent.addFlags(Intent.FLAG_ACTIVITY_NEW_TASK) + context.startActivity(shareIntent) + } + + /** + * Save transcription as a markdown note. + */ + fun saveAsNote(text: String, title: String? = null): File? { + return try { + val notesDir = getNotesDirectory() + notesDir.mkdirs() + + val timestamp = SimpleDateFormat("yyyy-MM-dd-HHmmss", Locale.US).format(Date()) + val noteTitle = title ?: extractTitle(text) + val safeTitle = noteTitle.take(40).replace(Regex("[^a-zA-Z0-9 -]"), "") + val filename = "$timestamp-$safeTitle.md" + + val file = File(notesDir, filename) + val content = buildNoteContent(text, noteTitle) + file.writeText(content) + + showToast("Saved: $filename") + Log.i(TAG, "Note saved: ${file.absolutePath}") + file + + } catch (e: Exception) { + Log.e(TAG, "Failed to save note", e) + showToast("Failed to save note") + null + } + } + + /** + * Create a task file (compatible with Backlog.md format). + */ + fun createTask(text: String, title: String? = null, priority: String = "medium"): File? { + return try { + val tasksDir = getTasksDirectory() + tasksDir.mkdirs() + + val timestamp = SimpleDateFormat("yyyy-MM-dd-HHmmss", Locale.US).format(Date()) + val taskTitle = title ?: extractTitle(text) + val safeTitle = taskTitle.take(40).replace(Regex("[^a-zA-Z0-9 -]"), "") + val filename = "$timestamp-$safeTitle.md" + + val file = File(tasksDir, filename) + val content = buildTaskContent(text, taskTitle, priority) + file.writeText(content) + + showToast("Task created: $taskTitle") + Log.i(TAG, "Task saved: ${file.absolutePath}") + file + + } catch (e: Exception) { + Log.e(TAG, "Failed to create task", e) + showToast("Failed to create task") + null + } + } + + /** + * Analyze text to determine best routing. + */ + fun analyzeIntent(text: String): AnalysisResult { + val textLower = text.lowercase() + + // Detect intent from keywords + val intent = when { + textLower.containsAny("task", "todo", "need to", "should", "must", "remind me") -> Intent.TASK + textLower.containsAny("?", "how", "what", "why", "when", "where", "can you", "help me") -> Intent.QUESTION + textLower.containsAny("idea", "thought", "maybe", "what if", "consider") -> Intent.IDEA + else -> Intent.NOTE + } + + // Extract title + val title = extractTitle(text) + + // Determine priority + val priority = when { + textLower.containsAny("urgent", "asap", "immediately", "critical") -> "high" + textLower.containsAny("when you get a chance", "eventually", "sometime") -> "low" + else -> "medium" + } + + // Suggest action + val suggestedAction = when (intent) { + Intent.TASK -> Action.CreateTask + Intent.QUESTION -> Action.Share + Intent.IDEA, Intent.NOTE -> Action.SaveNote + } + + return AnalysisResult( + intent = intent, + title = title, + cleanedText = text.trim(), + priority = priority, + suggestedAction = suggestedAction + ) + } + + private fun extractTitle(text: String): String { + // Get first sentence or first 60 chars + val firstSentence = text.split(Regex("[.?!]")).firstOrNull()?.trim() ?: text + val title = firstSentence.take(60) + + // Clean up common voice prefixes + val prefixes = listOf( + "create a task to ", + "task to ", + "add task ", + "note ", + "remind me to ", + "please " + ) + + var cleanTitle = title.lowercase() + for (prefix in prefixes) { + if (cleanTitle.startsWith(prefix)) { + cleanTitle = title.substring(prefix.length) + break + } + } + + return cleanTitle.trim().replaceFirstChar { it.uppercaseChar() }.ifEmpty { "Voice Note" } + } + + private fun buildNoteContent(text: String, title: String): String { + val timestamp = SimpleDateFormat("yyyy-MM-dd HH:mm", Locale.US).format(Date()) + return """ + |# $title + | + |$text + | + |--- + |Created: $timestamp + |Source: voice + """.trimMargin() + } + + private fun buildTaskContent(text: String, title: String, priority: String): String { + val timestamp = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss", Locale.US).format(Date()) + return """ + |--- + |title: $title + |status: To Do + |priority: $priority + |created: $timestamp + |source: voice + |--- + | + |# $title + | + |$text + """.trimMargin() + } + + private fun getNotesDirectory(): File { + // Try external storage first, fall back to app files + val externalDir = context.getExternalFilesDir(Environment.DIRECTORY_DOCUMENTS) + return File(externalDir ?: context.filesDir, "VoiceNotes") + } + + private fun getTasksDirectory(): File { + val externalDir = context.getExternalFilesDir(Environment.DIRECTORY_DOCUMENTS) + return File(externalDir ?: context.filesDir, "VoiceTasks") + } + + private fun showToast(message: String) { + Toast.makeText(context, message, Toast.LENGTH_SHORT).show() + } + + private fun String.containsAny(vararg keywords: String): Boolean = + keywords.any { this.contains(it) } + + enum class Intent { + TASK, NOTE, QUESTION, IDEA + } + + data class AnalysisResult( + val intent: Intent, + val title: String, + val cleanedText: String, + val priority: String, + val suggestedAction: Action + ) +} diff --git a/app/src/main/java/com/jeffemmett/voicecommand/audio/AudioRecorder.kt b/app/src/main/java/com/jeffemmett/voicecommand/audio/AudioRecorder.kt new file mode 100644 index 0000000..0d8e161 --- /dev/null +++ b/app/src/main/java/com/jeffemmett/voicecommand/audio/AudioRecorder.kt @@ -0,0 +1,223 @@ +package com.jeffemmett.voicecommand.audio + +import android.Manifest +import android.content.Context +import android.content.pm.PackageManager +import android.media.AudioFormat +import android.media.AudioRecord +import android.media.MediaRecorder +import android.util.Log +import androidx.core.content.ContextCompat +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.flow.Flow +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.StateFlow +import kotlinx.coroutines.flow.asStateFlow +import kotlinx.coroutines.flow.flow +import kotlinx.coroutines.flow.flowOn +import kotlinx.coroutines.isActive +import kotlinx.coroutines.withContext +import java.io.File +import java.io.FileOutputStream +import java.nio.ByteBuffer +import java.nio.ByteOrder +import kotlin.coroutines.coroutineContext + +/** + * Audio recorder that captures audio for transcription. + * Uses AudioRecord for raw PCM data that can be fed directly to Whisper. + */ +class AudioRecorder(private val context: Context) { + + companion object { + private const val TAG = "AudioRecorder" + + // Whisper expects 16kHz mono audio + const val SAMPLE_RATE = 16000 + const val CHANNEL_CONFIG = AudioFormat.CHANNEL_IN_MONO + const val AUDIO_FORMAT = AudioFormat.ENCODING_PCM_16BIT + + val BUFFER_SIZE: Int = AudioRecord.getMinBufferSize( + SAMPLE_RATE, + CHANNEL_CONFIG, + AUDIO_FORMAT + ).coerceAtLeast(SAMPLE_RATE * 2) // At least 1 second buffer + } + + sealed class RecordingState { + data object Idle : RecordingState() + data object Recording : RecordingState() + data object Processing : RecordingState() + data class Error(val message: String) : RecordingState() + } + + private val _state = MutableStateFlow(RecordingState.Idle) + val state: StateFlow = _state.asStateFlow() + + private var audioRecord: AudioRecord? = null + private val audioBuffer = mutableListOf() + + fun hasPermission(): Boolean { + return ContextCompat.checkSelfPermission( + context, + Manifest.permission.RECORD_AUDIO + ) == PackageManager.PERMISSION_GRANTED + } + + /** + * Start recording audio. Returns a Flow of audio samples for real-time processing. + */ + suspend fun startRecording(): Flow = flow { + if (!hasPermission()) { + _state.value = RecordingState.Error("Microphone permission not granted") + return@flow + } + + try { + audioRecord = AudioRecord( + MediaRecorder.AudioSource.MIC, + SAMPLE_RATE, + CHANNEL_CONFIG, + AUDIO_FORMAT, + BUFFER_SIZE + ) + + if (audioRecord?.state != AudioRecord.STATE_INITIALIZED) { + _state.value = RecordingState.Error("Failed to initialize AudioRecord") + return@flow + } + + audioRecord?.startRecording() + _state.value = RecordingState.Recording + audioBuffer.clear() + + Log.d(TAG, "Recording started") + + val buffer = ShortArray(BUFFER_SIZE / 2) + + while (coroutineContext.isActive && _state.value == RecordingState.Recording) { + val readCount = audioRecord?.read(buffer, 0, buffer.size) ?: -1 + + if (readCount > 0) { + val samples = buffer.copyOf(readCount) + audioBuffer.addAll(samples.toList()) + emit(samples) + } else if (readCount < 0) { + Log.e(TAG, "AudioRecord read error: $readCount") + break + } + } + + } catch (e: Exception) { + Log.e(TAG, "Recording error", e) + _state.value = RecordingState.Error(e.message ?: "Recording failed") + } + }.flowOn(Dispatchers.IO) + + /** + * Stop recording and return the complete audio buffer as float samples. + */ + suspend fun stopRecording(): FloatArray = withContext(Dispatchers.IO) { + try { + audioRecord?.stop() + audioRecord?.release() + audioRecord = null + + _state.value = RecordingState.Processing + + Log.d(TAG, "Recording stopped, ${audioBuffer.size} samples captured") + + // Convert short samples to float (-1.0 to 1.0) + val floatSamples = FloatArray(audioBuffer.size) + for (i in audioBuffer.indices) { + floatSamples[i] = audioBuffer[i].toFloat() / Short.MAX_VALUE + } + + audioBuffer.clear() + floatSamples + + } catch (e: Exception) { + Log.e(TAG, "Error stopping recording", e) + _state.value = RecordingState.Error(e.message ?: "Stop failed") + floatArrayOf() + } + } + + /** + * Cancel recording without processing. + */ + fun cancel() { + try { + audioRecord?.stop() + audioRecord?.release() + audioRecord = null + audioBuffer.clear() + _state.value = RecordingState.Idle + } catch (e: Exception) { + Log.e(TAG, "Error canceling recording", e) + } + } + + fun setIdle() { + _state.value = RecordingState.Idle + } + + /** + * Save audio buffer to WAV file for debugging. + */ + suspend fun saveToWav(file: File): Boolean = withContext(Dispatchers.IO) { + try { + val samples = audioBuffer.toShortArray() + val byteBuffer = ByteBuffer.allocate(samples.size * 2) + byteBuffer.order(ByteOrder.LITTLE_ENDIAN) + samples.forEach { byteBuffer.putShort(it) } + + FileOutputStream(file).use { fos -> + // Write WAV header + writeWavHeader(fos, samples.size * 2, SAMPLE_RATE, 1, 16) + // Write audio data + fos.write(byteBuffer.array()) + } + true + } catch (e: Exception) { + Log.e(TAG, "Failed to save WAV", e) + false + } + } + + private fun writeWavHeader( + out: FileOutputStream, + dataSize: Int, + sampleRate: Int, + channels: Int, + bitsPerSample: Int + ) { + val totalSize = dataSize + 36 + val byteRate = sampleRate * channels * bitsPerSample / 8 + val blockAlign = channels * bitsPerSample / 8 + + val header = ByteBuffer.allocate(44) + header.order(ByteOrder.LITTLE_ENDIAN) + + // RIFF header + header.put("RIFF".toByteArray()) + header.putInt(totalSize) + header.put("WAVE".toByteArray()) + + // fmt subchunk + header.put("fmt ".toByteArray()) + header.putInt(16) // Subchunk1Size + header.putShort(1) // AudioFormat (PCM) + header.putShort(channels.toShort()) + header.putInt(sampleRate) + header.putInt(byteRate) + header.putShort(blockAlign.toShort()) + header.putShort(bitsPerSample.toShort()) + + // data subchunk + header.put("data".toByteArray()) + header.putInt(dataSize) + + out.write(header.array()) + } +} diff --git a/app/src/main/java/com/jeffemmett/voicecommand/service/BootReceiver.kt b/app/src/main/java/com/jeffemmett/voicecommand/service/BootReceiver.kt new file mode 100644 index 0000000..e6606ac --- /dev/null +++ b/app/src/main/java/com/jeffemmett/voicecommand/service/BootReceiver.kt @@ -0,0 +1,33 @@ +package com.jeffemmett.voicecommand.service + +import android.content.BroadcastReceiver +import android.content.Context +import android.content.Intent +import android.util.Log +import androidx.datastore.preferences.core.booleanPreferencesKey +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.flow.first +import kotlinx.coroutines.launch + +/** + * Receives boot completed broadcasts to restore floating button service if enabled. + */ +class BootReceiver : BroadcastReceiver() { + + companion object { + private const val TAG = "BootReceiver" + } + + override fun onReceive(context: Context, intent: Intent) { + if (intent.action == Intent.ACTION_BOOT_COMPLETED || + intent.action == "android.intent.action.QUICKBOOT_POWERON" + ) { + Log.i(TAG, "Boot completed, checking if floating button should be started") + + // Check preferences and start service if enabled + // For now, we don't auto-start - user must enable manually + // This can be enhanced with DataStore preferences + } + } +} diff --git a/app/src/main/java/com/jeffemmett/voicecommand/service/FloatingButtonService.kt b/app/src/main/java/com/jeffemmett/voicecommand/service/FloatingButtonService.kt new file mode 100644 index 0000000..aaf3d88 --- /dev/null +++ b/app/src/main/java/com/jeffemmett/voicecommand/service/FloatingButtonService.kt @@ -0,0 +1,206 @@ +package com.jeffemmett.voicecommand.service + +import android.app.Notification +import android.app.PendingIntent +import android.app.Service +import android.content.Intent +import android.graphics.PixelFormat +import android.os.Build +import android.os.IBinder +import android.view.Gravity +import android.view.MotionEvent +import android.view.WindowManager +import androidx.compose.foundation.background +import androidx.compose.foundation.gestures.detectDragGestures +import androidx.compose.foundation.gestures.detectTapGestures +import androidx.compose.foundation.layout.Box +import androidx.compose.foundation.layout.size +import androidx.compose.foundation.shape.CircleShape +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.filled.Mic +import androidx.compose.material.icons.filled.Stop +import androidx.compose.material3.Icon +import androidx.compose.runtime.* +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.draw.clip +import androidx.compose.ui.graphics.Color +import androidx.compose.ui.input.pointer.pointerInput +import androidx.compose.ui.platform.ComposeView +import androidx.compose.ui.unit.dp +import androidx.core.app.NotificationCompat +import androidx.lifecycle.Lifecycle +import androidx.lifecycle.LifecycleOwner +import androidx.lifecycle.LifecycleRegistry +import androidx.lifecycle.setViewTreeLifecycleOwner +import androidx.savedstate.SavedStateRegistry +import androidx.savedstate.SavedStateRegistryController +import androidx.savedstate.SavedStateRegistryOwner +import androidx.savedstate.setViewTreeSavedStateRegistryOwner +import com.jeffemmett.voicecommand.MainActivity +import com.jeffemmett.voicecommand.R +import com.jeffemmett.voicecommand.VoiceCommandApp +import com.jeffemmett.voicecommand.audio.AudioRecorder +import kotlinx.coroutines.* +import kotlinx.coroutines.flow.collect + +/** + * Service that displays a floating button overlay for quick voice recording. + */ +class FloatingButtonService : Service(), LifecycleOwner, SavedStateRegistryOwner { + + private lateinit var windowManager: WindowManager + private var floatingView: ComposeView? = null + private val serviceScope = CoroutineScope(Dispatchers.Main + SupervisorJob()) + + private lateinit var audioRecorder: AudioRecorder + private var isRecording = false + + // Lifecycle management for Compose + private val lifecycleRegistry = LifecycleRegistry(this) + private val savedStateRegistryController = SavedStateRegistryController.create(this) + + override val lifecycle: Lifecycle get() = lifecycleRegistry + override val savedStateRegistry: SavedStateRegistry + get() = savedStateRegistryController.savedStateRegistry + + override fun onCreate() { + super.onCreate() + savedStateRegistryController.performRestore(null) + lifecycleRegistry.handleLifecycleEvent(Lifecycle.Event.ON_CREATE) + + windowManager = getSystemService(WINDOW_SERVICE) as WindowManager + audioRecorder = AudioRecorder(this) + + createFloatingButton() + startForeground(NOTIFICATION_ID, createNotification()) + + lifecycleRegistry.handleLifecycleEvent(Lifecycle.Event.ON_START) + lifecycleRegistry.handleLifecycleEvent(Lifecycle.Event.ON_RESUME) + } + + private fun createFloatingButton() { + val params = WindowManager.LayoutParams( + WindowManager.LayoutParams.WRAP_CONTENT, + WindowManager.LayoutParams.WRAP_CONTENT, + WindowManager.LayoutParams.TYPE_APPLICATION_OVERLAY, + WindowManager.LayoutParams.FLAG_NOT_FOCUSABLE or + WindowManager.LayoutParams.FLAG_LAYOUT_IN_SCREEN, + PixelFormat.TRANSLUCENT + ).apply { + gravity = Gravity.TOP or Gravity.START + x = 50 + y = 300 + } + + floatingView = ComposeView(this).apply { + setViewTreeLifecycleOwner(this@FloatingButtonService) + setViewTreeSavedStateRegistryOwner(this@FloatingButtonService) + + setContent { + var recording by remember { mutableStateOf(false) } + var offsetX by remember { mutableFloatStateOf(0f) } + var offsetY by remember { mutableFloatStateOf(0f) } + + Box( + modifier = Modifier + .size(56.dp) + .clip(CircleShape) + .background(if (recording) Color.Red else Color(0xFF6200EE)) + .pointerInput(Unit) { + detectTapGestures( + onTap = { + recording = !recording + isRecording = recording + if (recording) { + startRecording() + } else { + stopRecording() + } + } + ) + } + .pointerInput(Unit) { + detectDragGestures { change, dragAmount -> + change.consume() + params.x += dragAmount.x.toInt() + params.y += dragAmount.y.toInt() + windowManager.updateViewLayout(floatingView, params) + } + }, + contentAlignment = Alignment.Center + ) { + Icon( + imageVector = if (recording) Icons.Default.Stop else Icons.Default.Mic, + contentDescription = if (recording) "Stop" else "Record", + tint = Color.White, + modifier = Modifier.size(24.dp) + ) + } + } + } + + windowManager.addView(floatingView, params) + } + + private fun startRecording() { + serviceScope.launch { + audioRecorder.startRecording().collect { /* streaming samples */ } + } + } + + private fun stopRecording() { + serviceScope.launch { + val samples = audioRecorder.stopRecording() + if (samples.isNotEmpty()) { + val engine = VoiceCommandApp.getInstance().transcriptionEngine + val result = engine.transcribe(samples) + if (result != null) { + // Launch result activity + val intent = Intent( + this@FloatingButtonService, + com.jeffemmett.voicecommand.ui.TranscriptionResultActivity::class.java + ).apply { + addFlags(Intent.FLAG_ACTIVITY_NEW_TASK) + putExtra("transcription", result) + } + startActivity(intent) + } + } + audioRecorder.setIdle() + isRecording = false + } + } + + private fun createNotification(): Notification { + val intent = Intent(this, MainActivity::class.java) + val pendingIntent = PendingIntent.getActivity( + this, 0, intent, + PendingIntent.FLAG_IMMUTABLE + ) + + return NotificationCompat.Builder(this, VoiceCommandApp.CHANNEL_OVERLAY) + .setContentTitle(getString(R.string.notification_overlay_title)) + .setContentText(getString(R.string.notification_overlay_text)) + .setSmallIcon(R.drawable.ic_mic) + .setContentIntent(pendingIntent) + .setOngoing(true) + .build() + } + + override fun onBind(intent: Intent?): IBinder? = null + + override fun onDestroy() { + lifecycleRegistry.handleLifecycleEvent(Lifecycle.Event.ON_PAUSE) + lifecycleRegistry.handleLifecycleEvent(Lifecycle.Event.ON_STOP) + lifecycleRegistry.handleLifecycleEvent(Lifecycle.Event.ON_DESTROY) + + floatingView?.let { windowManager.removeView(it) } + serviceScope.cancel() + super.onDestroy() + } + + companion object { + private const val NOTIFICATION_ID = 1001 + } +} diff --git a/app/src/main/java/com/jeffemmett/voicecommand/service/VoiceCommandTileService.kt b/app/src/main/java/com/jeffemmett/voicecommand/service/VoiceCommandTileService.kt new file mode 100644 index 0000000..5c64978 --- /dev/null +++ b/app/src/main/java/com/jeffemmett/voicecommand/service/VoiceCommandTileService.kt @@ -0,0 +1,124 @@ +package com.jeffemmett.voicecommand.service + +import android.content.Intent +import android.graphics.drawable.Icon +import android.os.Build +import android.service.quicksettings.Tile +import android.service.quicksettings.TileService +import android.util.Log +import com.jeffemmett.voicecommand.R +import com.jeffemmett.voicecommand.VoiceCommandApp +import com.jeffemmett.voicecommand.audio.AudioRecorder +import kotlinx.coroutines.* +import kotlinx.coroutines.flow.collect + +/** + * Quick Settings tile for triggering voice recording from the notification shade. + */ +class VoiceCommandTileService : TileService() { + + companion object { + private const val TAG = "VoiceCommandTile" + } + + private val serviceScope = CoroutineScope(Dispatchers.Main + SupervisorJob()) + + private var audioRecorder: AudioRecorder? = null + private var isRecording = false + + override fun onStartListening() { + super.onStartListening() + updateTile() + } + + override fun onClick() { + super.onClick() + + if (audioRecorder == null) { + audioRecorder = AudioRecorder(this) + } + + if (!audioRecorder!!.hasPermission()) { + Log.w(TAG, "No microphone permission") + // Open app to request permission + val intent = Intent(this, com.jeffemmett.voicecommand.MainActivity::class.java).apply { + addFlags(Intent.FLAG_ACTIVITY_NEW_TASK) + } + startActivityAndCollapse(intent) + return + } + + if (isRecording) { + stopRecording() + } else { + startRecording() + } + } + + private fun startRecording() { + isRecording = true + updateTile() + + Log.i(TAG, "Starting recording from tile") + + serviceScope.launch { + audioRecorder?.startRecording()?.collect { /* streaming */ } + } + } + + private fun stopRecording() { + isRecording = false + updateTile() + + Log.i(TAG, "Stopping recording from tile") + + serviceScope.launch { + val samples = audioRecorder?.stopRecording() ?: floatArrayOf() + if (samples.isNotEmpty()) { + val engine = VoiceCommandApp.getInstance().transcriptionEngine + val result = engine.transcribe(samples) + if (result != null) { + showResult(result) + } + } + audioRecorder?.setIdle() + } + } + + private fun showResult(transcription: String) { + val intent = Intent( + this, + com.jeffemmett.voicecommand.ui.TranscriptionResultActivity::class.java + ).apply { + addFlags(Intent.FLAG_ACTIVITY_NEW_TASK) + putExtra("transcription", transcription) + } + startActivityAndCollapse(intent) + } + + private fun updateTile() { + qsTile?.let { tile -> + tile.state = if (isRecording) Tile.STATE_ACTIVE else Tile.STATE_INACTIVE + tile.label = if (isRecording) "Recording..." else "Voice Note" + tile.subtitle = if (isRecording) "Tap to stop" else "Tap to record" + + // Update icon based on state + tile.icon = Icon.createWithResource( + this, + if (isRecording) R.drawable.ic_stop else R.drawable.ic_mic + ) + + tile.updateTile() + } + } + + override fun onTileRemoved() { + super.onTileRemoved() + serviceScope.cancel() + } + + override fun onDestroy() { + serviceScope.cancel() + super.onDestroy() + } +} diff --git a/app/src/main/java/com/jeffemmett/voicecommand/service/VolumeButtonAccessibilityService.kt b/app/src/main/java/com/jeffemmett/voicecommand/service/VolumeButtonAccessibilityService.kt new file mode 100644 index 0000000..b576159 --- /dev/null +++ b/app/src/main/java/com/jeffemmett/voicecommand/service/VolumeButtonAccessibilityService.kt @@ -0,0 +1,175 @@ +package com.jeffemmett.voicecommand.service + +import android.accessibilityservice.AccessibilityService +import android.content.Intent +import android.util.Log +import android.view.KeyEvent +import android.view.accessibility.AccessibilityEvent +import com.jeffemmett.voicecommand.VoiceCommandApp +import com.jeffemmett.voicecommand.audio.AudioRecorder +import kotlinx.coroutines.* +import kotlinx.coroutines.flow.collect + +/** + * Accessibility service that detects volume button combinations to trigger recording. + * + * Trigger: Press both Volume Up and Volume Down simultaneously + */ +class VolumeButtonAccessibilityService : AccessibilityService() { + + companion object { + private const val TAG = "VolumeButtonService" + private const val COMBO_TIMEOUT_MS = 300L + + var isServiceEnabled = false + private set + } + + private val serviceScope = CoroutineScope(Dispatchers.Main + SupervisorJob()) + + private var volumeUpPressed = false + private var volumeDownPressed = false + private var comboJob: Job? = null + + private lateinit var audioRecorder: AudioRecorder + private var isRecording = false + + override fun onCreate() { + super.onCreate() + audioRecorder = AudioRecorder(this) + isServiceEnabled = true + Log.i(TAG, "Accessibility service created") + } + + override fun onServiceConnected() { + super.onServiceConnected() + Log.i(TAG, "Accessibility service connected") + } + + override fun onAccessibilityEvent(event: AccessibilityEvent?) { + // We primarily use key events, not accessibility events + } + + override fun onKeyEvent(event: KeyEvent): Boolean { + when (event.keyCode) { + KeyEvent.KEYCODE_VOLUME_UP -> { + if (event.action == KeyEvent.ACTION_DOWN) { + volumeUpPressed = true + checkCombo() + } else if (event.action == KeyEvent.ACTION_UP) { + volumeUpPressed = false + } + } + KeyEvent.KEYCODE_VOLUME_DOWN -> { + if (event.action == KeyEvent.ACTION_DOWN) { + volumeDownPressed = true + checkCombo() + } else if (event.action == KeyEvent.ACTION_UP) { + volumeDownPressed = false + } + } + } + + // Don't consume the event - let volume buttons work normally + return false + } + + private fun checkCombo() { + if (volumeUpPressed && volumeDownPressed) { + // Cancel any pending combo timeout + comboJob?.cancel() + + // Volume combo detected! + Log.d(TAG, "Volume combo detected!") + toggleRecording() + + // Reset after a brief delay + comboJob = serviceScope.launch { + delay(COMBO_TIMEOUT_MS) + volumeUpPressed = false + volumeDownPressed = false + } + } + } + + private fun toggleRecording() { + if (isRecording) { + stopRecording() + } else { + startRecording() + } + } + + private fun startRecording() { + if (!audioRecorder.hasPermission()) { + Log.w(TAG, "No microphone permission") + return + } + + isRecording = true + Log.i(TAG, "Starting recording via volume combo") + + // Vibrate to indicate recording started + vibrate() + + serviceScope.launch { + audioRecorder.startRecording().collect { /* streaming */ } + } + } + + private fun stopRecording() { + isRecording = false + Log.i(TAG, "Stopping recording") + + // Vibrate to indicate recording stopped + vibrate() + + serviceScope.launch { + val samples = audioRecorder.stopRecording() + if (samples.isNotEmpty()) { + val engine = VoiceCommandApp.getInstance().transcriptionEngine + val result = engine.transcribe(samples) + if (result != null) { + showResult(result) + } + } + audioRecorder.setIdle() + } + } + + private fun showResult(transcription: String) { + val intent = Intent( + this, + com.jeffemmett.voicecommand.ui.TranscriptionResultActivity::class.java + ).apply { + addFlags(Intent.FLAG_ACTIVITY_NEW_TASK) + putExtra("transcription", transcription) + } + startActivity(intent) + } + + private fun vibrate() { + try { + val vibrator = getSystemService(android.os.Vibrator::class.java) + vibrator?.vibrate( + android.os.VibrationEffect.createOneShot( + 100, + android.os.VibrationEffect.DEFAULT_AMPLITUDE + ) + ) + } catch (e: Exception) { + Log.w(TAG, "Vibration failed", e) + } + } + + override fun onInterrupt() { + Log.w(TAG, "Accessibility service interrupted") + } + + override fun onDestroy() { + isServiceEnabled = false + serviceScope.cancel() + super.onDestroy() + Log.i(TAG, "Accessibility service destroyed") + } +} diff --git a/app/src/main/java/com/jeffemmett/voicecommand/stt/SherpaTranscriptionEngine.kt b/app/src/main/java/com/jeffemmett/voicecommand/stt/SherpaTranscriptionEngine.kt new file mode 100644 index 0000000..fd7c212 --- /dev/null +++ b/app/src/main/java/com/jeffemmett/voicecommand/stt/SherpaTranscriptionEngine.kt @@ -0,0 +1,270 @@ +package com.jeffemmett.voicecommand.stt + +import android.content.Context +import android.util.Log +import com.k2fsa.sherpa.onnx.OfflineRecognizer +import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig +import com.k2fsa.sherpa.onnx.OfflineWhisperModelConfig +import com.k2fsa.sherpa.onnx.OfflineModelConfig +import com.k2fsa.sherpa.onnx.getOfflineModelConfig +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.flow.MutableStateFlow +import kotlinx.coroutines.flow.StateFlow +import kotlinx.coroutines.flow.asStateFlow +import kotlinx.coroutines.withContext +import java.io.File +import java.io.FileOutputStream + +/** + * Sherpa-ONNX based transcription engine for on-device speech recognition. + * Uses Whisper models for high-quality transcription. + */ +class SherpaTranscriptionEngine(private val context: Context) { + + companion object { + private const val TAG = "SherpaTranscription" + + // Available models (from smallest to largest) + enum class WhisperModel( + val displayName: String, + val encoder: String, + val decoder: String, + val sizeBytes: Long // Approximate download size + ) { + TINY_EN( + "Tiny English", + "tiny.en-encoder.int8.onnx", + "tiny.en-decoder.int8.onnx", + 40_000_000L // ~40MB + ), + BASE_EN( + "Base English", + "base.en-encoder.int8.onnx", + "base.en-decoder.int8.onnx", + 75_000_000L // ~75MB + ), + SMALL_EN( + "Small English", + "small.en-encoder.int8.onnx", + "small.en-decoder.int8.onnx", + 250_000_000L // ~250MB + ), + TINY( + "Tiny Multilingual", + "tiny-encoder.int8.onnx", + "tiny-decoder.int8.onnx", + 40_000_000L + ), + BASE( + "Base Multilingual", + "base-encoder.int8.onnx", + "base-decoder.int8.onnx", + 75_000_000L + ), + SMALL( + "Small Multilingual", + "small-encoder.int8.onnx", + "small-decoder.int8.onnx", + 250_000_000L + ) + } + + // Model download base URL + private const val MODEL_BASE_URL = + "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/" + } + + sealed class EngineState { + data object NotInitialized : EngineState() + data class Downloading(val progress: Float, val modelName: String) : EngineState() + data object Initializing : EngineState() + data object Ready : EngineState() + data class Error(val message: String) : EngineState() + } + + private val _state = MutableStateFlow(EngineState.NotInitialized) + val state: StateFlow = _state.asStateFlow() + + private var recognizer: OfflineRecognizer? = null + private var currentModel: WhisperModel = WhisperModel.TINY_EN + + private val modelsDir: File + get() = File(context.filesDir, "models").also { it.mkdirs() } + + /** + * Initialize the transcription engine with the specified model. + */ + suspend fun initialize(model: WhisperModel = WhisperModel.TINY_EN): Boolean = + withContext(Dispatchers.IO) { + try { + currentModel = model + _state.value = EngineState.Initializing + + // Check if model files exist, download if needed + val encoderFile = File(modelsDir, model.encoder) + val decoderFile = File(modelsDir, model.decoder) + val tokensFile = File(modelsDir, "tokens.txt") + + if (!encoderFile.exists() || !decoderFile.exists()) { + if (!downloadModel(model)) { + _state.value = EngineState.Error("Failed to download model") + return@withContext false + } + } + + // Ensure tokens file exists + if (!tokensFile.exists()) { + extractTokensFromAssets() + } + + // Create recognizer config + val config = createRecognizerConfig(model) + + recognizer = OfflineRecognizer(config) + + _state.value = EngineState.Ready + Log.i(TAG, "Transcription engine initialized with model: ${model.displayName}") + true + + } catch (e: Exception) { + Log.e(TAG, "Failed to initialize transcription engine", e) + _state.value = EngineState.Error(e.message ?: "Initialization failed") + false + } + } + + private fun createRecognizerConfig(model: WhisperModel): OfflineRecognizerConfig { + val whisperConfig = OfflineWhisperModelConfig( + encoder = File(modelsDir, model.encoder).absolutePath, + decoder = File(modelsDir, model.decoder).absolutePath, + language = if (model.name.endsWith("_EN")) "en" else "", + task = "transcribe" + ) + + val modelConfig = OfflineModelConfig( + whisper = whisperConfig, + tokens = File(modelsDir, "tokens.txt").absolutePath, + numThreads = Runtime.getRuntime().availableProcessors().coerceAtMost(4), + debug = false + ) + + return OfflineRecognizerConfig( + modelConfig = modelConfig, + decodingMethod = "greedy_search" + ) + } + + private suspend fun downloadModel(model: WhisperModel): Boolean { + _state.value = EngineState.Downloading(0f, model.displayName) + + // For now, copy from assets if bundled, otherwise return error + // In production, you'd download from MODEL_BASE_URL + return try { + // Try to copy from assets first (for bundled models) + copyModelFromAssets(model) + } catch (e: Exception) { + Log.e(TAG, "Model not bundled and download not implemented", e) + false + } + } + + private fun copyModelFromAssets(model: WhisperModel): Boolean { + return try { + val assetManager = context.assets + + // Copy encoder + assetManager.open("models/${model.encoder}").use { input -> + FileOutputStream(File(modelsDir, model.encoder)).use { output -> + input.copyTo(output) + } + } + + // Copy decoder + assetManager.open("models/${model.decoder}").use { input -> + FileOutputStream(File(modelsDir, model.decoder)).use { output -> + input.copyTo(output) + } + } + + _state.value = EngineState.Downloading(1f, model.displayName) + true + } catch (e: Exception) { + Log.w(TAG, "Model not found in assets: ${model.encoder}") + false + } + } + + private fun extractTokensFromAssets() { + try { + context.assets.open("models/tokens.txt").use { input -> + FileOutputStream(File(modelsDir, "tokens.txt")).use { output -> + input.copyTo(output) + } + } + } catch (e: Exception) { + Log.w(TAG, "Tokens file not found in assets, will use default") + } + } + + /** + * Transcribe audio samples to text. + * @param samples Float array of audio samples at 16kHz mono + * @return Transcribed text, or null if failed + */ + suspend fun transcribe(samples: FloatArray): String? = withContext(Dispatchers.Default) { + val rec = recognizer + if (rec == null) { + Log.e(TAG, "Recognizer not initialized") + return@withContext null + } + + if (samples.isEmpty()) { + Log.w(TAG, "Empty audio samples") + return@withContext null + } + + try { + Log.d(TAG, "Transcribing ${samples.size} samples (${samples.size / 16000f} seconds)") + + val startTime = System.currentTimeMillis() + + // Create a stream and decode + val stream = rec.createStream() + stream.acceptWaveform(samples, 16000) + + rec.decode(stream) + + val result = rec.getResult(stream).text.trim() + + val duration = System.currentTimeMillis() - startTime + Log.i(TAG, "Transcription completed in ${duration}ms: \"$result\"") + + stream.release() + + result.ifEmpty { null } + + } catch (e: Exception) { + Log.e(TAG, "Transcription failed", e) + null + } + } + + /** + * Check if the engine is ready for transcription. + */ + fun isReady(): Boolean = _state.value == EngineState.Ready + + /** + * Get the current model being used. + */ + fun getCurrentModel(): WhisperModel = currentModel + + /** + * Release resources. + */ + fun release() { + recognizer?.release() + recognizer = null + _state.value = EngineState.NotInitialized + } +} diff --git a/app/src/main/java/com/jeffemmett/voicecommand/ui/RecordingScreen.kt b/app/src/main/java/com/jeffemmett/voicecommand/ui/RecordingScreen.kt new file mode 100644 index 0000000..3f7a96e --- /dev/null +++ b/app/src/main/java/com/jeffemmett/voicecommand/ui/RecordingScreen.kt @@ -0,0 +1,301 @@ +package com.jeffemmett.voicecommand.ui + +import androidx.compose.animation.core.* +import androidx.compose.foundation.background +import androidx.compose.foundation.clickable +import androidx.compose.foundation.layout.* +import androidx.compose.foundation.shape.CircleShape +import androidx.compose.foundation.shape.RoundedCornerShape +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.filled.* +import androidx.compose.material3.* +import androidx.compose.runtime.* +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.draw.clip +import androidx.compose.ui.draw.scale +import androidx.compose.ui.graphics.Color +import androidx.compose.ui.platform.LocalContext +import androidx.compose.ui.text.style.TextAlign +import androidx.compose.ui.unit.dp +import androidx.lifecycle.compose.collectAsStateWithLifecycle +import com.jeffemmett.voicecommand.action.ActionRouter +import com.jeffemmett.voicecommand.audio.AudioRecorder +import com.jeffemmett.voicecommand.stt.SherpaTranscriptionEngine +import kotlinx.coroutines.flow.collect +import kotlinx.coroutines.launch + +@Composable +fun RecordingScreen( + audioRecorder: AudioRecorder, + transcriptionEngine: SherpaTranscriptionEngine, + modifier: Modifier = Modifier +) { + val context = LocalContext.current + val scope = rememberCoroutineScope() + + val recordingState by audioRecorder.state.collectAsStateWithLifecycle() + var transcriptionResult by remember { mutableStateOf(null) } + var showActionMenu by remember { mutableStateOf(false) } + + Column( + modifier = modifier.fillMaxWidth(), + horizontalAlignment = Alignment.CenterHorizontally + ) { + // Recording Button + RecordingButton( + isRecording = recordingState is AudioRecorder.RecordingState.Recording, + isProcessing = recordingState is AudioRecorder.RecordingState.Processing, + onClick = { + scope.launch { + when (recordingState) { + is AudioRecorder.RecordingState.Idle -> { + // Start recording + audioRecorder.startRecording().collect { /* streaming samples */ } + } + is AudioRecorder.RecordingState.Recording -> { + // Stop and transcribe + val samples = audioRecorder.stopRecording() + if (samples.isNotEmpty()) { + val result = transcriptionEngine.transcribe(samples) + if (result != null) { + transcriptionResult = result + showActionMenu = true + } + } + audioRecorder.setIdle() + } + else -> { /* ignore during processing */ } + } + } + } + ) + + Spacer(modifier = Modifier.height(16.dp)) + + // Status Text + Text( + text = when (recordingState) { + is AudioRecorder.RecordingState.Idle -> "Tap to start recording" + is AudioRecorder.RecordingState.Recording -> "Recording... Tap to stop" + is AudioRecorder.RecordingState.Processing -> "Processing..." + is AudioRecorder.RecordingState.Error -> + (recordingState as AudioRecorder.RecordingState.Error).message + }, + style = MaterialTheme.typography.bodyLarge, + textAlign = TextAlign.Center + ) + + // Transcription Result + transcriptionResult?.let { text -> + Spacer(modifier = Modifier.height(24.dp)) + TranscriptionResultCard( + text = text, + onDismiss = { + transcriptionResult = null + showActionMenu = false + } + ) + } + + // Action Menu + if (showActionMenu && transcriptionResult != null) { + Spacer(modifier = Modifier.height(16.dp)) + ActionMenu( + text = transcriptionResult!!, + onActionComplete = { + showActionMenu = false + } + ) + } + } +} + +@Composable +fun RecordingButton( + isRecording: Boolean, + isProcessing: Boolean, + onClick: () -> Unit +) { + val infiniteTransition = rememberInfiniteTransition(label = "pulse") + val scale by infiniteTransition.animateFloat( + initialValue = 1f, + targetValue = if (isRecording) 1.15f else 1f, + animationSpec = infiniteRepeatable( + animation = tween(600, easing = EaseInOut), + repeatMode = RepeatMode.Reverse + ), + label = "scale" + ) + + Box( + modifier = Modifier + .size(120.dp) + .scale(if (isRecording) scale else 1f) + .clip(CircleShape) + .background( + when { + isProcessing -> MaterialTheme.colorScheme.surfaceVariant + isRecording -> MaterialTheme.colorScheme.error + else -> MaterialTheme.colorScheme.primary + } + ) + .clickable(enabled = !isProcessing) { onClick() }, + contentAlignment = Alignment.Center + ) { + if (isProcessing) { + CircularProgressIndicator( + modifier = Modifier.size(48.dp), + color = MaterialTheme.colorScheme.primary + ) + } else { + Icon( + imageVector = if (isRecording) Icons.Default.Stop else Icons.Default.Mic, + contentDescription = if (isRecording) "Stop" else "Record", + modifier = Modifier.size(48.dp), + tint = Color.White + ) + } + } +} + +@Composable +fun TranscriptionResultCard( + text: String, + onDismiss: () -> Unit +) { + Card( + modifier = Modifier.fillMaxWidth(), + colors = CardDefaults.cardColors( + containerColor = MaterialTheme.colorScheme.secondaryContainer + ) + ) { + Column(modifier = Modifier.padding(16.dp)) { + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.SpaceBetween, + verticalAlignment = Alignment.CenterVertically + ) { + Text( + "Transcription", + style = MaterialTheme.typography.labelMedium, + color = MaterialTheme.colorScheme.onSecondaryContainer + ) + IconButton(onClick = onDismiss, modifier = Modifier.size(24.dp)) { + Icon(Icons.Default.Close, "Close", modifier = Modifier.size(16.dp)) + } + } + Spacer(modifier = Modifier.height(8.dp)) + Text( + text = text, + style = MaterialTheme.typography.bodyLarge, + color = MaterialTheme.colorScheme.onSecondaryContainer + ) + } + } +} + +@Composable +fun ActionMenu( + text: String, + onActionComplete: () -> Unit +) { + val context = LocalContext.current + val actionRouter = remember { ActionRouter(context) } + val analysis = remember(text) { actionRouter.analyzeIntent(text) } + + Card(modifier = Modifier.fillMaxWidth()) { + Column(modifier = Modifier.padding(16.dp)) { + Text( + "Actions", + style = MaterialTheme.typography.labelMedium, + modifier = Modifier.padding(bottom = 8.dp) + ) + + Text( + "Detected: ${analysis.intent.name.lowercase()} | ${analysis.title}", + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant, + modifier = Modifier.padding(bottom = 12.dp) + ) + + // Action buttons in a flow row + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.spacedBy(8.dp) + ) { + ActionButton( + icon = Icons.Default.ContentCopy, + label = "Copy", + highlighted = analysis.suggestedAction is ActionRouter.Action.Copy, + onClick = { + actionRouter.copyToClipboard(text) + onActionComplete() + } + ) + + ActionButton( + icon = Icons.Default.Share, + label = "Share", + highlighted = analysis.suggestedAction is ActionRouter.Action.Share, + onClick = { + actionRouter.share(text, analysis.title) + onActionComplete() + } + ) + } + + Spacer(modifier = Modifier.height(8.dp)) + + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.spacedBy(8.dp) + ) { + ActionButton( + icon = Icons.Default.NoteAdd, + label = "Save Note", + highlighted = analysis.suggestedAction is ActionRouter.Action.SaveNote, + onClick = { + actionRouter.saveAsNote(text, analysis.title) + onActionComplete() + } + ) + + ActionButton( + icon = Icons.Default.TaskAlt, + label = "Create Task", + highlighted = analysis.suggestedAction is ActionRouter.Action.CreateTask, + onClick = { + actionRouter.createTask(text, analysis.title, analysis.priority) + onActionComplete() + } + ) + } + } + } +} + +@Composable +fun RowScope.ActionButton( + icon: androidx.compose.ui.graphics.vector.ImageVector, + label: String, + highlighted: Boolean, + onClick: () -> Unit +) { + Button( + onClick = onClick, + modifier = Modifier.weight(1f), + colors = if (highlighted) { + ButtonDefaults.buttonColors() + } else { + ButtonDefaults.outlinedButtonColors() + }, + border = if (!highlighted) { + ButtonDefaults.outlinedButtonBorder(true) + } else null + ) { + Icon(icon, null, modifier = Modifier.size(18.dp)) + Spacer(Modifier.width(4.dp)) + Text(label, style = MaterialTheme.typography.labelMedium) + } +} diff --git a/app/src/main/java/com/jeffemmett/voicecommand/ui/TranscriptionResultActivity.kt b/app/src/main/java/com/jeffemmett/voicecommand/ui/TranscriptionResultActivity.kt new file mode 100644 index 0000000..5227bcb --- /dev/null +++ b/app/src/main/java/com/jeffemmett/voicecommand/ui/TranscriptionResultActivity.kt @@ -0,0 +1,221 @@ +package com.jeffemmett.voicecommand.ui + +import android.os.Bundle +import androidx.activity.ComponentActivity +import androidx.activity.compose.setContent +import androidx.compose.foundation.layout.* +import androidx.compose.material.icons.Icons +import androidx.compose.material.icons.filled.* +import androidx.compose.material3.* +import androidx.compose.runtime.* +import androidx.compose.ui.Alignment +import androidx.compose.ui.Modifier +import androidx.compose.ui.platform.LocalContext +import androidx.compose.ui.unit.dp +import com.jeffemmett.voicecommand.action.ActionRouter +import com.jeffemmett.voicecommand.ui.theme.VoiceCommandTheme + +/** + * Dialog-style activity to show transcription results and action menu. + * Used when recording is triggered from overlay or accessibility service. + */ +class TranscriptionResultActivity : ComponentActivity() { + + override fun onCreate(savedInstanceState: Bundle?) { + super.onCreate(savedInstanceState) + + val transcription = intent.getStringExtra("transcription") ?: "" + + if (transcription.isEmpty()) { + finish() + return + } + + setContent { + VoiceCommandTheme { + TranscriptionResultDialog( + transcription = transcription, + onDismiss = { finish() } + ) + } + } + } +} + +@OptIn(ExperimentalMaterial3Api::class) +@Composable +fun TranscriptionResultDialog( + transcription: String, + onDismiss: () -> Unit +) { + val context = LocalContext.current + val actionRouter = remember { ActionRouter(context) } + val analysis = remember(transcription) { actionRouter.analyzeIntent(transcription) } + + Surface( + modifier = Modifier.fillMaxSize(), + color = MaterialTheme.colorScheme.scrim.copy(alpha = 0.5f) + ) { + Card( + modifier = Modifier + .fillMaxWidth() + .padding(16.dp), + ) { + Column( + modifier = Modifier.padding(20.dp) + ) { + // Header + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.SpaceBetween, + verticalAlignment = Alignment.CenterVertically + ) { + Text( + "Transcription", + style = MaterialTheme.typography.titleLarge + ) + IconButton(onClick = onDismiss) { + Icon(Icons.Default.Close, "Close") + } + } + + Spacer(modifier = Modifier.height(12.dp)) + + // Transcription text + Card( + colors = CardDefaults.cardColors( + containerColor = MaterialTheme.colorScheme.surfaceVariant + ) + ) { + Text( + text = transcription, + style = MaterialTheme.typography.bodyLarge, + modifier = Modifier.padding(16.dp) + ) + } + + Spacer(modifier = Modifier.height(8.dp)) + + // Analysis info + Text( + "Detected: ${analysis.intent.name.lowercase()} | Priority: ${analysis.priority}", + style = MaterialTheme.typography.bodySmall, + color = MaterialTheme.colorScheme.onSurfaceVariant + ) + + Spacer(modifier = Modifier.height(20.dp)) + + // Action buttons + Text( + "Quick Actions", + style = MaterialTheme.typography.titleMedium, + modifier = Modifier.padding(bottom = 12.dp) + ) + + // Row 1: Copy and Share + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.spacedBy(8.dp) + ) { + OutlinedButton( + onClick = { + actionRouter.copyToClipboard(transcription) + onDismiss() + }, + modifier = Modifier.weight(1f) + ) { + Icon(Icons.Default.ContentCopy, null, Modifier.size(18.dp)) + Spacer(Modifier.width(6.dp)) + Text("Copy") + } + + OutlinedButton( + onClick = { + actionRouter.share(transcription, analysis.title) + onDismiss() + }, + modifier = Modifier.weight(1f) + ) { + Icon(Icons.Default.Share, null, Modifier.size(18.dp)) + Spacer(Modifier.width(6.dp)) + Text("Share") + } + } + + Spacer(modifier = Modifier.height(8.dp)) + + // Row 2: Save Note and Create Task + Row( + modifier = Modifier.fillMaxWidth(), + horizontalArrangement = Arrangement.spacedBy(8.dp) + ) { + val isNoteSuggested = analysis.suggestedAction is ActionRouter.Action.SaveNote + + if (isNoteSuggested) { + Button( + onClick = { + actionRouter.saveAsNote(transcription, analysis.title) + onDismiss() + }, + modifier = Modifier.weight(1f) + ) { + Icon(Icons.Default.NoteAdd, null, Modifier.size(18.dp)) + Spacer(Modifier.width(6.dp)) + Text("Save Note") + } + } else { + OutlinedButton( + onClick = { + actionRouter.saveAsNote(transcription, analysis.title) + onDismiss() + }, + modifier = Modifier.weight(1f) + ) { + Icon(Icons.Default.NoteAdd, null, Modifier.size(18.dp)) + Spacer(Modifier.width(6.dp)) + Text("Save Note") + } + } + + val isTaskSuggested = analysis.suggestedAction is ActionRouter.Action.CreateTask + + if (isTaskSuggested) { + Button( + onClick = { + actionRouter.createTask(transcription, analysis.title, analysis.priority) + onDismiss() + }, + modifier = Modifier.weight(1f) + ) { + Icon(Icons.Default.TaskAlt, null, Modifier.size(18.dp)) + Spacer(Modifier.width(6.dp)) + Text("Create Task") + } + } else { + OutlinedButton( + onClick = { + actionRouter.createTask(transcription, analysis.title, analysis.priority) + onDismiss() + }, + modifier = Modifier.weight(1f) + ) { + Icon(Icons.Default.TaskAlt, null, Modifier.size(18.dp)) + Spacer(Modifier.width(6.dp)) + Text("Create Task") + } + } + } + + Spacer(modifier = Modifier.height(16.dp)) + + // Dismiss button + TextButton( + onClick = onDismiss, + modifier = Modifier.align(Alignment.CenterHorizontally) + ) { + Text("Dismiss") + } + } + } + } +} diff --git a/app/src/main/java/com/jeffemmett/voicecommand/ui/theme/Theme.kt b/app/src/main/java/com/jeffemmett/voicecommand/ui/theme/Theme.kt new file mode 100644 index 0000000..19b70a4 --- /dev/null +++ b/app/src/main/java/com/jeffemmett/voicecommand/ui/theme/Theme.kt @@ -0,0 +1,70 @@ +package com.jeffemmett.voicecommand.ui.theme + +import android.os.Build +import androidx.compose.foundation.isSystemInDarkTheme +import androidx.compose.material3.* +import androidx.compose.runtime.Composable +import androidx.compose.ui.graphics.Color +import androidx.compose.ui.platform.LocalContext + +private val DarkColorScheme = darkColorScheme( + primary = Color(0xFFBB86FC), + onPrimary = Color(0xFF000000), + primaryContainer = Color(0xFF3700B3), + onPrimaryContainer = Color(0xFFFFFFFF), + secondary = Color(0xFF03DAC6), + onSecondary = Color(0xFF000000), + secondaryContainer = Color(0xFF018786), + onSecondaryContainer = Color(0xFFFFFFFF), + tertiary = Color(0xFFCF6679), + error = Color(0xFFCF6679), + errorContainer = Color(0xFF93000A), + background = Color(0xFF121212), + onBackground = Color(0xFFE1E1E1), + surface = Color(0xFF1E1E1E), + onSurface = Color(0xFFE1E1E1), + surfaceVariant = Color(0xFF2D2D2D), + onSurfaceVariant = Color(0xFFCACACA), +) + +private val LightColorScheme = lightColorScheme( + primary = Color(0xFF6200EE), + onPrimary = Color(0xFFFFFFFF), + primaryContainer = Color(0xFFE8DEF8), + onPrimaryContainer = Color(0xFF21005D), + secondary = Color(0xFF03DAC6), + onSecondary = Color(0xFF000000), + secondaryContainer = Color(0xFFCEFAF8), + onSecondaryContainer = Color(0xFF002020), + tertiary = Color(0xFF7D5260), + error = Color(0xFFB3261E), + errorContainer = Color(0xFFF9DEDC), + background = Color(0xFFFFFBFE), + onBackground = Color(0xFF1C1B1F), + surface = Color(0xFFFFFBFE), + onSurface = Color(0xFF1C1B1F), + surfaceVariant = Color(0xFFE7E0EC), + onSurfaceVariant = Color(0xFF49454F), +) + +@Composable +fun VoiceCommandTheme( + darkTheme: Boolean = isSystemInDarkTheme(), + dynamicColor: Boolean = true, + content: @Composable () -> Unit +) { + val colorScheme = when { + dynamicColor && Build.VERSION.SDK_INT >= Build.VERSION_CODES.S -> { + val context = LocalContext.current + if (darkTheme) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context) + } + darkTheme -> DarkColorScheme + else -> LightColorScheme + } + + MaterialTheme( + colorScheme = colorScheme, + typography = Typography(), + content = content + ) +} diff --git a/app/src/main/res/drawable/ic_mic.xml b/app/src/main/res/drawable/ic_mic.xml new file mode 100644 index 0000000..aedc035 --- /dev/null +++ b/app/src/main/res/drawable/ic_mic.xml @@ -0,0 +1,11 @@ + + + + diff --git a/app/src/main/res/drawable/ic_stop.xml b/app/src/main/res/drawable/ic_stop.xml new file mode 100644 index 0000000..f726885 --- /dev/null +++ b/app/src/main/res/drawable/ic_stop.xml @@ -0,0 +1,11 @@ + + + + diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml new file mode 100644 index 0000000..a1404b7 --- /dev/null +++ b/app/src/main/res/values/strings.xml @@ -0,0 +1,39 @@ + + + Voice Command + Voice Note + Enables voice recording with volume button shortcuts. Press Volume Up + Volume Down to start recording. + + + Tap to start recording + Recording\u2026 Tap to stop + Processing\u2026 + + + Copy + Share + Save Note + Create Task + Cancel + + + Settings + Whisper Model + Floating Button + Volume Button Trigger + Quick Settings Tile + Notes Folder + + + Voice Recording + Recording voice + Tap to stop recording + Floating Button + Voice Command Active + Floating button is ready + + + Downloading model\u2026 + Model ready + Failed to load model + diff --git a/app/src/main/res/values/themes.xml b/app/src/main/res/values/themes.xml new file mode 100644 index 0000000..d9f903f --- /dev/null +++ b/app/src/main/res/values/themes.xml @@ -0,0 +1,14 @@ + + + + + + diff --git a/app/src/main/res/xml/accessibility_service_config.xml b/app/src/main/res/xml/accessibility_service_config.xml new file mode 100644 index 0000000..b723fc1 --- /dev/null +++ b/app/src/main/res/xml/accessibility_service_config.xml @@ -0,0 +1,10 @@ + + diff --git a/backlog/config.yml b/backlog/config.yml new file mode 100644 index 0000000..6399723 --- /dev/null +++ b/backlog/config.yml @@ -0,0 +1,13 @@ +project_name: "Voice Command Android" +default_status: "To Do" +statuses: ["To Do", "In Progress", "Done"] +labels: [android, ui, audio, stt, release] +milestones: [] +date_format: yyyy-mm-dd +max_column_width: 20 +auto_open_browser: true +default_port: 6420 +remote_operations: true +auto_commit: false +zero_padded_ids: 3 +bypass_git_hooks: false diff --git a/backlog/tasks/task-001 - Download-and-bundle-Whisper-model.md b/backlog/tasks/task-001 - Download-and-bundle-Whisper-model.md new file mode 100644 index 0000000..f70b093 --- /dev/null +++ b/backlog/tasks/task-001 - Download-and-bundle-Whisper-model.md @@ -0,0 +1,27 @@ +--- +id: task-001 +title: Download and bundle Whisper model +status: To Do +assignee: [] +created_date: '2025-12-07' +labels: [stt, release] +priority: high +dependencies: [] +--- + +## Description + +Download Whisper model files from sherpa-onnx releases and bundle with the app. + +## Plan + +1. Run `./download-models.sh` to fetch tiny.en model +2. Verify model files in `app/src/main/assets/models/` +3. Test model loading in emulator + +## Acceptance Criteria + +- [ ] tiny.en-encoder.int8.onnx downloaded +- [ ] tiny.en-decoder.int8.onnx downloaded +- [ ] tokens.txt downloaded +- [ ] Model loads successfully at runtime diff --git a/backlog/tasks/task-002 - Build-and-test-debug-APK.md b/backlog/tasks/task-002 - Build-and-test-debug-APK.md new file mode 100644 index 0000000..7b39e26 --- /dev/null +++ b/backlog/tasks/task-002 - Build-and-test-debug-APK.md @@ -0,0 +1,36 @@ +--- +id: task-002 +title: Build and test debug APK +status: To Do +assignee: [] +created_date: '2025-12-07' +labels: [android, release] +priority: high +dependencies: [task-001] +--- + +## Description + +Build debug APK and test all features on a real Android device. + +## Plan + +1. Run `./gradlew assembleDebug` +2. Install APK on test device +3. Test each feature systematically +4. Fix any runtime issues + +## Acceptance Criteria + +- [ ] APK builds without errors +- [ ] App installs and launches +- [ ] Microphone permission request works +- [ ] Audio recording captures speech +- [ ] Transcription produces text output +- [ ] Floating button overlay works +- [ ] Volume button trigger works +- [ ] Quick Settings tile works +- [ ] Copy to clipboard works +- [ ] Share intent works +- [ ] Save note creates markdown file +- [ ] Create task creates backlog-compatible file diff --git a/build.gradle.kts b/build.gradle.kts new file mode 100644 index 0000000..af41a67 --- /dev/null +++ b/build.gradle.kts @@ -0,0 +1,6 @@ +// Top-level build file +plugins { + alias(libs.plugins.android.application) apply false + alias(libs.plugins.kotlin.android) apply false + alias(libs.plugins.kotlin.compose) apply false +} diff --git a/download-models.sh b/download-models.sh new file mode 100755 index 0000000..7959f81 --- /dev/null +++ b/download-models.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# Download Whisper models for bundling with the Android app + +set -e + +MODEL_DIR="app/src/main/assets/models" +mkdir -p "$MODEL_DIR" + +echo "Downloading Whisper models for sherpa-onnx..." + +# Base URL for sherpa-onnx models +BASE_URL="https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models" + +# Download tiny.en model (smallest, English only, ~40MB total) +echo "Downloading tiny.en model..." +curl -L -o "$MODEL_DIR/tiny.en-encoder.int8.onnx" \ + "$BASE_URL/sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx" +curl -L -o "$MODEL_DIR/tiny.en-decoder.int8.onnx" \ + "$BASE_URL/sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx" +curl -L -o "$MODEL_DIR/tokens.txt" \ + "$BASE_URL/sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt" + +# Optional: Download base.en model (~75MB total) +# echo "Downloading base.en model..." +# curl -L -o "$MODEL_DIR/base.en-encoder.int8.onnx" \ +# "$BASE_URL/sherpa-onnx-whisper-base.en/base.en-encoder.int8.onnx" +# curl -L -o "$MODEL_DIR/base.en-decoder.int8.onnx" \ +# "$BASE_URL/sherpa-onnx-whisper-base.en/base.en-decoder.int8.onnx" + +echo "" +echo "Models downloaded to $MODEL_DIR/" +ls -lh "$MODEL_DIR/" + +echo "" +echo "Next steps:" +echo "1. Build the APK: ./gradlew assembleDebug" +echo "2. Install on device: adb install app/build/outputs/apk/debug/app-debug.apk" diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml new file mode 100644 index 0000000..b47cf01 --- /dev/null +++ b/gradle/libs.versions.toml @@ -0,0 +1,31 @@ +[versions] +agp = "8.7.2" +kotlin = "2.0.21" +coreKtx = "1.15.0" +lifecycleRuntimeKtx = "2.8.7" +activityCompose = "1.9.3" +composeBom = "2024.11.00" +sherpaOnnx = "1.10.32" +coroutines = "1.9.0" +datastore = "1.1.1" + +[libraries] +androidx-core-ktx = { group = "androidx.core", name = "core-ktx", version.ref = "coreKtx" } +androidx-lifecycle-runtime-ktx = { group = "androidx.lifecycle", name = "lifecycle-runtime-ktx", version.ref = "lifecycleRuntimeKtx" } +androidx-lifecycle-viewmodel-compose = { group = "androidx.lifecycle", name = "lifecycle-viewmodel-compose", version.ref = "lifecycleRuntimeKtx" } +androidx-activity-compose = { group = "androidx.activity", name = "activity-compose", version.ref = "activityCompose" } +androidx-compose-bom = { group = "androidx.compose", name = "compose-bom", version.ref = "composeBom" } +androidx-ui = { group = "androidx.compose.ui", name = "ui" } +androidx-ui-graphics = { group = "androidx.compose.ui", name = "ui-graphics" } +androidx-ui-tooling = { group = "androidx.compose.ui", name = "ui-tooling" } +androidx-ui-tooling-preview = { group = "androidx.compose.ui", name = "ui-tooling-preview" } +androidx-material3 = { group = "androidx.compose.material3", name = "material3" } +androidx-material-icons-extended = { group = "androidx.compose.material", name = "material-icons-extended" } +kotlinx-coroutines-android = { group = "org.jetbrains.kotlinx", name = "kotlinx-coroutines-android", version.ref = "coroutines" } +androidx-datastore-preferences = { group = "androidx.datastore", name = "datastore-preferences", version.ref = "datastore" } +sherpa-onnx = { group = "com.github.k2-fsa", name = "sherpa-onnx-android", version.ref = "sherpaOnnx" } + +[plugins] +android-application = { id = "com.android.application", version.ref = "agp" } +kotlin-android = { id = "org.jetbrains.kotlin.android", version.ref = "kotlin" } +kotlin-compose = { id = "org.jetbrains.kotlin.plugin.compose", version.ref = "kotlin" } diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000..09523c0 --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,7 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.9-bin.zip +networkTimeout=10000 +validateDistributionUrl=true +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/settings.gradle.kts b/settings.gradle.kts new file mode 100644 index 0000000..e92abe6 --- /dev/null +++ b/settings.gradle.kts @@ -0,0 +1,19 @@ +pluginManagement { + repositories { + google() + mavenCentral() + gradlePluginPortal() + } +} + +dependencyResolutionManagement { + repositoriesMode.set(RepositoriesMode.FAIL_ON_PROJECT_REPOS) + repositories { + google() + mavenCentral() + maven { url = uri("https://jitpack.io") } + } +} + +rootProject.name = "VoiceCommand" +include(":app")