mirror of
https://github.com/basecamp/omarchy.git
synced 2026-02-17 15:25:37 +00:00
Add voice dictation with voxtype (#4088)
* Try with voxtype * Update delay to prevent skipped characters * Add removal of voxtype * Use -bin package * Fix for yay * Nerdicons! * Use new, subtle nerdfont glyphs instead of standard icons * Do this in the waybar config instead * Make voxtype a permanent fixture so it is not lost on waybar resets * Record purpose * Add Dictation hotkeys * Tweak wording and point to config * Reuse the same help * Better communication * Anticipate moving the package to OPR * Clarify where the packages are coming from * input group not needed when using hyprland hotkey * Explain hotkey off * Allow for changing of the model on right click * Allow config on right click * Be more specific * Focus on config editing with waybar clicks --------- Co-authored-by: Ryan Hughes <ryan@heyoodle.com>
This commit is contained in:
committed by
GitHub
parent
e3cd567f6f
commit
0d42f1bafe
@@ -49,4 +49,8 @@ bindd = SUPER CTRL ALT, B, Show battery remaining, exec, notify-send " Ba
|
||||
bindd = SUPER CTRL, A, Audio controls, exec, omarchy-launch-audio
|
||||
bindd = SUPER CTRL, B, Bluetooth controls, exec, omarchy-launch-bluetooth
|
||||
bindd = SUPER CTRL, W, Wifi controls, exec, omarchy-launch-wifi
|
||||
bindd = SUPER CTRL, T, Activity, exec, omarchy-launch-tui btop
|
||||
bindd = SUPER CTRL, T, Activity, exec, omarchy-launch-tui btop
|
||||
|
||||
# Dictation
|
||||
bindd = SUPER CTRL, X, Start dictation, exec, voxtype record start
|
||||
binddr = SUPER CTRL, X, Stop dictation, exec, voxtype record stop
|
||||
|
||||
97
default/voxtype/config.toml
Normal file
97
default/voxtype/config.toml
Normal file
@@ -0,0 +1,97 @@
|
||||
# Voxtype Configuration
|
||||
#
|
||||
# Location: ~/.config/voxtype/config.toml
|
||||
# All settings can be overridden via CLI flags
|
||||
#
|
||||
# State file for external integrations (Waybar, polybar, etc.)
|
||||
# Use "auto" for default location ($XDG_RUNTIME_DIR/voxtype/state),
|
||||
# a custom path, or "disabled" to turn off. The daemon writes state
|
||||
# ("idle", "recording", "transcribing") to this file whenever it changes.
|
||||
# Required for `voxtype record toggle` and `voxtype status` commands.
|
||||
state_file = "auto"
|
||||
|
||||
[hotkey]
|
||||
# Hotkey is configured in Hyprland. Default is Super + Ctrl + X
|
||||
enabled = false
|
||||
|
||||
[audio]
|
||||
# Audio input device ("default" uses system default)
|
||||
# List devices with: pactl list sources short
|
||||
device = "default"
|
||||
|
||||
# Sample rate in Hz (whisper expects 16000)
|
||||
sample_rate = 16000
|
||||
|
||||
# Maximum recording duration in seconds (safety limit)
|
||||
max_duration_secs = 60
|
||||
|
||||
# [audio.feedback]
|
||||
# Enable audio feedback sounds (beeps when recording starts/stops)
|
||||
# enabled = true
|
||||
#
|
||||
# Sound theme: "default", "subtle", "mechanical", or path to custom theme directory
|
||||
# theme = "default"
|
||||
#
|
||||
# Volume level (0.0 to 1.0)
|
||||
# volume = 0.7
|
||||
|
||||
[whisper]
|
||||
# Model to use for transcription
|
||||
# Options: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v3, large-v3-turbo
|
||||
# .en models are English-only but faster and more accurate for English
|
||||
# large-v3-turbo is faster than large-v3 with minimal accuracy loss (recommended for GPU)
|
||||
# Or provide absolute path to a custom .bin model file
|
||||
model = "base.en"
|
||||
|
||||
# Language for transcription
|
||||
# Use "en" for English, "auto" for auto-detection
|
||||
# See: https://github.com/openai/whisper#available-models-and-languages
|
||||
language = "en"
|
||||
|
||||
# Translate non-English speech to English
|
||||
translate = false
|
||||
|
||||
# Number of CPU threads for inference (omit for auto-detection)
|
||||
# threads = 4
|
||||
|
||||
[output]
|
||||
# Primary output mode: "type" or "clipboard"
|
||||
# - type: Simulates keyboard input at cursor position (requires ydotool)
|
||||
# - clipboard: Copies text to clipboard (requires wl-copy)
|
||||
mode = "type"
|
||||
|
||||
# Fall back to clipboard if typing fails
|
||||
fallback_to_clipboard = true
|
||||
|
||||
# Delay between typed characters in milliseconds
|
||||
# 0 = fastest possible, increase if characters are dropped
|
||||
type_delay_ms = 1
|
||||
|
||||
# Post-processing command (optional)
|
||||
# Pipe transcribed text through an external command for cleanup before output.
|
||||
# The command receives text on stdin and outputs processed text on stdout.
|
||||
# Useful for LLM-based text cleanup, grammar correction, filler word removal.
|
||||
# On any failure (timeout, error), falls back to original transcription.
|
||||
#
|
||||
# [output.post_process]
|
||||
# command = "ollama run llama3.2:1b 'Clean up this dictation. Fix grammar, remove filler words. Output only the cleaned text:'"
|
||||
# timeout_ms = 30000 # 30 second timeout (generous for LLM)
|
||||
|
||||
[output.notification]
|
||||
# Show notification when recording starts (hotkey pressed)
|
||||
on_recording_start = false
|
||||
|
||||
# Show notification when recording stops (transcription beginning)
|
||||
on_recording_stop = false
|
||||
|
||||
# Show notification with transcribed text after transcription completes
|
||||
on_transcription = false
|
||||
|
||||
# [text]
|
||||
# Text processing options (word replacements, spoken punctuation)
|
||||
#
|
||||
# Enable spoken punctuation conversion (e.g., say "period" to get ".")
|
||||
# spoken_punctuation = false
|
||||
#
|
||||
# Custom word replacements (case-insensitive)
|
||||
# replacements = { "hyperwhisper" = "hyprwhspr" }
|
||||
Reference in New Issue
Block a user