Add voice dictation with voxtype (#4088)

* Try with voxtype

* Update delay to prevent skipped characters

* Add removal of voxtype

* Use -bin package

* Fix for yay

* Nerdicons!

* Use new, subtle nerdfont glyphs instead of standard icons

* Do this in the waybar config instead

* Make voxtype a permanent fixture so it is not lost on waybar resets

* Record purpose

* Add Dictation hotkeys

* Tweak wording and point to config

* Reuse the same help

* Better communication

* Anticipate moving the package to OPR

* Clarify where the packages are coming from

* input group not needed when using hyprland hotkey

* Explain hotkey off

* Allow for changing of the model on right click

* Allow config on right click

* Be more specific

* Focus on config editing with waybar clicks

---------

Co-authored-by: Ryan Hughes <ryan@heyoodle.com>
This commit is contained in:
David Heinemeier Hansson
2026-01-06 13:00:19 +01:00
committed by GitHub
parent e3cd567f6f
commit 0d42f1bafe
12 changed files with 263 additions and 6 deletions

View File

@@ -278,8 +278,8 @@ show_install_ai_menu() {
echo ollama
)
case $(menu "Install" " Dictation [AUR]\n󱚤 Claude Code\n󱚤 Copilot CLI [AUR]\n󱚤 Cursor CLI\n󱚤 Gemini\n󱚤 OpenAI Codex\n󱚤 LM Studio\n󱚤 Ollama\n󱚤 Crush") in
*Dictation*) present_terminal "echo 'Installing Hyprwhspr from AUR...'; yay -S --noconfirm hyprwhspr && hyprwhspr setup" ;;
case $(menu "Install" " Dictation\n󱚤 Claude Code\n󱚤 Copilot CLI [AUR]\n󱚤 Cursor CLI\n󱚤 Gemini\n󱚤 OpenAI Codex\n󱚤 LM Studio\n󱚤 Ollama\n󱚤 Crush") in
*Dictation*) present_terminal omarchy-voxtype-install ;;
*Claude*) install "Claude Code" "claude-code" ;;
*Copilot*) aur_install "Copilot CLI" "github-copilot-cli" ;;
*Cursor*) install "Cursor CLI" "cursor-cli" ;;
@@ -368,11 +368,12 @@ show_install_elixir_menu() {
}
show_remove_menu() {
case $(menu "Remove" "󰣇 Package\n Web App\n TUI\n󰵮 Development\n󰸌 Theme\n󰍲 Windows\n󰈷 Fingerprint\n Fido2") in
case $(menu "Remove" "󰣇 Package\n Web App\n TUI\n󰵮 Development\n Dictation\n󰸌 Theme\n󰍲 Windows\n󰈷 Fingerprint\n Fido2") in
*Package*) terminal omarchy-pkg-remove ;;
*Web*) present_terminal omarchy-webapp-remove ;;
*TUI*) present_terminal omarchy-tui-remove ;;
*Development*) show_remove_development_menu ;;
*Dictation*) present_terminal omarchy-voxtype-remove ;;
*Theme*) present_terminal omarchy-theme-remove ;;
*Windows*) present_terminal "omarchy-windows-vm remove" ;;
*Fingerprint*) present_terminal "omarchy-setup-fingerprint --remove" ;;

17
bin/omarchy-pkg-aur-add Executable file
View File

@@ -0,0 +1,17 @@
#!/bin/bash
# Add the named packages to the system from the AUR if they're missing. Returns false if it couldn't be done.
if omarchy-pkg-missing "$@"; then
yay -S --noconfirm --needed "$@" || exit 1
fi
for pkg in "$@"; do
# Secondary check to handle states where pacman doesn't actually register an error
if ! pacman -Q "$pkg" &>/dev/null; then
echo -e "\033[31mError: Package '$pkg' did not install\033[0m" >&2
exit 1
fi
done
exit 0

6
bin/omarchy-voxtype-config Executable file
View File

@@ -0,0 +1,6 @@
#!/bin/bash
set -e
# Used by Voxtype waybar module to open config on right click
exec omarchy-launch-editor ~/.config/voxtype/config.toml

19
bin/omarchy-voxtype-install Executable file
View File

@@ -0,0 +1,19 @@
#!/bin/bash
set -e
# Install voxtype and configure it for use.
if gum confirm "Install Voxtype + AI model (~400MB) to enable dictation?"; then
omarchy-pkg-add wtype
omarchy-pkg-aur-add voxtype-bin
# Setup voxtype
mkdir -p ~/.config/voxtype
cp $OMARCHY_PATH/default/voxtype/config.toml ~/.config/voxtype/
voxtype setup --download
voxtype setup systemd
omarchy-restart-waybar
notify-send " Voxtype Dictation Ready" "Hold Super + Ctrl + X to dictate.\nEdit ~/.config/voxtype/config.toml for options." -t 10000
fi

5
bin/omarchy-voxtype-model Executable file
View File

@@ -0,0 +1,5 @@
#!/bin/bash
set -e
omarchy-launch-floating-terminal-with-presentation "voxtype setup model"
omarchy-restart-waybar

20
bin/omarchy-voxtype-remove Executable file
View File

@@ -0,0 +1,20 @@
#!/bin/bash
set -e
# Remove voxtype and its configurations.
if omarchy-cmd-present voxtype; then
echo "Uninstall Voxtype to remove dictation."
# Remove services
systemctl --user stop voxtype.service 2>/dev/null || true
rm -f ~/.config/systemd/user/voxtype*
systemctl --user daemon-reload
# Remove packages and configs
omarchy-pkg-drop wtype voxtype-bin
rm -rf ~/.config/voxtype
rm -rf ~/.local/share/voxtype
else
echo "Voxtype was not installed."
fi

9
bin/omarchy-voxtype-status Executable file
View File

@@ -0,0 +1,9 @@
#!/bin/bash
if omarchy-cmd-present voxtype; then
voxtype status --follow --extended --format json | while read -r line; do
echo "$line" | jq -c '. + {alt: .class}'
done
else
echo '{"alt": "", "tooltip": ""}'
fi

View File

@@ -5,7 +5,7 @@
"spacing": 0,
"height": 26,
"modules-left": ["custom/omarchy", "hyprland/workspaces"],
"modules-center": ["clock", "custom/update", "custom/screenrecording-indicator"],
"modules-center": ["clock", "custom/update", "custom/voxtype", "custom/screenrecording-indicator"],
"modules-right": [
"group/tray-expander",
"bluetooth",
@@ -140,6 +140,19 @@
"signal": 8,
"return-type": "json"
},
"custom/voxtype": {
"exec": "omarchy-voxtype-status",
"return-type": "json",
"format": "{icon}",
"format-icons": {
"idle": "",
"recording": "󰍬",
"transcribing": "󰔟"
},
"tooltip": true,
"on-click-right": "omarchy-voxtype-config",
"on-click": "omarchy-voxtype-model"
},
"tray": {
"icon-size": 12,
"spacing": 17

View File

@@ -74,10 +74,16 @@ tooltip {
#custom-screenrecording-indicator {
min-width: 12px;
margin-left: 8.75px;
margin-left: 5px;
font-size: 10px;
padding-bottom: 1px;
}
#custom-screenrecording-indicator.active {
color: #a55555;
}
#custom-voxtype {
min-width: 12px;
margin: 0 0 0 7.5px;
}

View File

@@ -49,4 +49,8 @@ bindd = SUPER CTRL ALT, B, Show battery remaining, exec, notify-send "󰁹 Ba
bindd = SUPER CTRL, A, Audio controls, exec, omarchy-launch-audio
bindd = SUPER CTRL, B, Bluetooth controls, exec, omarchy-launch-bluetooth
bindd = SUPER CTRL, W, Wifi controls, exec, omarchy-launch-wifi
bindd = SUPER CTRL, T, Activity, exec, omarchy-launch-tui btop
bindd = SUPER CTRL, T, Activity, exec, omarchy-launch-tui btop
# Dictation
bindd = SUPER CTRL, X, Start dictation, exec, voxtype record start
binddr = SUPER CTRL, X, Stop dictation, exec, voxtype record stop

View File

@@ -0,0 +1,97 @@
# Voxtype Configuration
#
# Location: ~/.config/voxtype/config.toml
# All settings can be overridden via CLI flags
#
# State file for external integrations (Waybar, polybar, etc.)
# Use "auto" for default location ($XDG_RUNTIME_DIR/voxtype/state),
# a custom path, or "disabled" to turn off. The daemon writes state
# ("idle", "recording", "transcribing") to this file whenever it changes.
# Required for `voxtype record toggle` and `voxtype status` commands.
state_file = "auto"
[hotkey]
# Hotkey is configured in Hyprland. Default is Super + Ctrl + X
enabled = false
[audio]
# Audio input device ("default" uses system default)
# List devices with: pactl list sources short
device = "default"
# Sample rate in Hz (whisper expects 16000)
sample_rate = 16000
# Maximum recording duration in seconds (safety limit)
max_duration_secs = 60
# [audio.feedback]
# Enable audio feedback sounds (beeps when recording starts/stops)
# enabled = true
#
# Sound theme: "default", "subtle", "mechanical", or path to custom theme directory
# theme = "default"
#
# Volume level (0.0 to 1.0)
# volume = 0.7
[whisper]
# Model to use for transcription
# Options: tiny, tiny.en, base, base.en, small, small.en, medium, medium.en, large-v3, large-v3-turbo
# .en models are English-only but faster and more accurate for English
# large-v3-turbo is faster than large-v3 with minimal accuracy loss (recommended for GPU)
# Or provide absolute path to a custom .bin model file
model = "base.en"
# Language for transcription
# Use "en" for English, "auto" for auto-detection
# See: https://github.com/openai/whisper#available-models-and-languages
language = "en"
# Translate non-English speech to English
translate = false
# Number of CPU threads for inference (omit for auto-detection)
# threads = 4
[output]
# Primary output mode: "type" or "clipboard"
# - type: Simulates keyboard input at cursor position (requires ydotool)
# - clipboard: Copies text to clipboard (requires wl-copy)
mode = "type"
# Fall back to clipboard if typing fails
fallback_to_clipboard = true
# Delay between typed characters in milliseconds
# 0 = fastest possible, increase if characters are dropped
type_delay_ms = 1
# Post-processing command (optional)
# Pipe transcribed text through an external command for cleanup before output.
# The command receives text on stdin and outputs processed text on stdout.
# Useful for LLM-based text cleanup, grammar correction, filler word removal.
# On any failure (timeout, error), falls back to original transcription.
#
# [output.post_process]
# command = "ollama run llama3.2:1b 'Clean up this dictation. Fix grammar, remove filler words. Output only the cleaned text:'"
# timeout_ms = 30000 # 30 second timeout (generous for LLM)
[output.notification]
# Show notification when recording starts (hotkey pressed)
on_recording_start = false
# Show notification when recording stops (transcription beginning)
on_recording_stop = false
# Show notification with transcribed text after transcription completes
on_transcription = false
# [text]
# Text processing options (word replacements, spoken punctuation)
#
# Enable spoken punctuation conversion (e.g., say "period" to get ".")
# spoken_punctuation = false
#
# Custom word replacements (case-insensitive)
# replacements = { "hyperwhisper" = "hyprwhspr" }

60
migrations/1767685679.sh Normal file
View File

@@ -0,0 +1,60 @@
echo "Add voxtype to waybar"
patch -N ~/.config/waybar/style.css << 'EOF'
--- a/waybar/style.css
+++ b/waybar/style.css
@@ -74,10 +74,16 @@ tooltip {
#custom-screenrecording-indicator {
min-width: 12px;
- margin-left: 8.75px;
+ margin-left: 5px;
font-size: 10px;
+ padding-bottom: 1px;
}
#custom-screenrecording-indicator.active {
color: #a55555;
}
+
+#custom-voxtype {
+ min-width: 12px;
+ margin: 0 0 0 7.5px;
+}
EOF
patch -N ~/.config/waybar/config.jsonc << 'EOF'
--- a/waybar/config.jsonc
+++ b/waybar/config.jsonc
@@ -5,7 +5,7 @@
"spacing": 0,
"height": 26,
"modules-left": ["custom/omarchy", "hyprland/workspaces"],
- "modules-center": ["clock", "custom/update", "custom/screenrecording-indicator"],
+ "modules-center": ["clock", "custom/update", "custom/voxtype", "custom/screenrecording-indicator"],
"modules-right": [
"group/tray-expander",
"bluetooth",
@@ -140,6 +140,19 @@
"signal": 8,
"return-type": "json"
},
+ "custom/voxtype": {
+ "exec": "omarchy-voxtype-status",
+ "return-type": "json",
+ "format": "{icon}",
+ "format-icons": {
+ "idle": "",
+ "recording": "󰍬",
+ "transcribing": "󰔟"
+ },
+ "tooltip": true,
+ "on-click-right": "omarchy-voxtype-config",
+ "on-click": "omarchy-voxtype-model"
+ },
"tray": {
"icon-size": 12,
"spacing": 17
EOF
omarchy-restart-waybar