voice key detection

cltsson · August 1

Hello all,

I receive quite a few requests to get the script for setting up a voicekey (voice response trigger). Here after you can find the complete script I came up with mixing a bit of everything I found here and there.

https://forum.cogsci.nl/discussion/5965/voice-key-recording

https://forum.cogsci.nl/discussion/8728/voice-key-and-recording

Several interesting functionalities :

Online voice key detection (without interruption of sound recording) with a very simple algorithm - basically, when a loudness threshold is reached for a certain number of consecutive frames, you trigger something.
Recording of a .wav file of the vocal response
Implementation of a more "complex" voice onset algorithm relying on the whole vocal response to define a more accurate threshold
Recording of a .csv file of the loudness data, timestamps and the identified voice onset
Plotting of the loudness data with the identified voise onset threshold

To make it work, I use 4 different inline components :

Create participant's folder

# Place this at the top of the experimental sequence to create a participant folder to save all the data
import os

# Creates a folder for each subject and will save all files there later on
newpath = experiment_path + "\\" + str(subject_nr)
if not os.path.exists(newpath):
    os.makedirs(newpath)

Setup of the microphone

# Place this BEFORE the component for which you want to record the sound

import pyaudio
import struct
import math
import wave
import pandas as pd
import numpy as np

# Create trial count variable - replace "practice_sequence" by the name of your trial sequence
trial_count = count_pratice_sequence + 1

# number of standard deviations above the mean (e.g. 1.5 x SD) taken as the threshold for voice onset detection - CHANGE IF MICROPHONE WAS NOT CLOSE ENOUGH OR DETECTION SETTING WAS NOT HIGH ENOUGH
nSD=1

# percentage of the peak signal in a file (e.g. 0.01 = 1 % of peak signal) taken as the minimal threshold value allowed
minTH=0.005

# Maximum response time - recording duration
timeout = 3000

# Parameters on the PyAudio object
FORMAT = pyaudio.paInt16 
SHORT_NORMALIZE = (1.0/32768.0)
CHANNELS = 2
RATE = 44100  
INPUT_BLOCK_TIME = 0.01
INPUT_FRAMES_PER_BLOCK = int(RATE*INPUT_BLOCK_TIME)

# Path where sound files will be saved
FILENAME = experiment_path + "\\" + str(subject_nr) + "\\" + "p_" + str(trial_count) + ".wav"
CHUNCK=1024

p = pyaudio.PyAudio()

# Definition of the function to process sound loundness
def get_rms(block):
    """Get root mean square as a measure of loudness"""
    count = len(block)/2
    format = "%dh" % (count)
    shorts = struct.unpack( format, block )
    sum_squares = 0.0
    for sample in shorts:
        n = sample * SHORT_NORMALIZE
        sum_squares += n*n
    return math.sqrt( sum_squares / count )


# Opens the microphone
stream = p.open(
    format=FORMAT,
    channels=CHANNELS,
    rate=RATE,
    input=True,
    input_device_index=0,
    frames_per_buffer=INPUT_FRAMES_PER_BLOCK
    )

Sound recording and online feedback

# Place this AFTER the component for which you want to record the sound

frames = []
list_ldn = []
list_ct = []

# Listen for sounds until a timeout occurs.
start_time = clock.time()
print(f"Trial n° {trial_count} starts recording")

flag_triggered = False  # Initialize the flag
loudness_counter = 0  # Initialize the counter for consecutive loudness checks

while clock.time() - start_time <= timeout:
    try:
        block = stream.read(CHUNCK)
        frames.append(block)
    except IOError as e:
        print(e)
    loudness = get_rms(block)
    if loudness > 0.025:
        loudness_counter += 1
    else:
        loudness_counter = 0
  
# Checks when the threshold of 0.025 has been reached for 2 consecutive frames for the first time - and ensures that no vocal response is given before 100 ms  
    if loudness_counter >= 2 and not flag_triggered and (clock.time() - start_time) > 0.1:
        # when the threshold is reached - presentation of 'response_detection_feedback' sketchpad
        my_canvas = items[u'response_detection_feedback'].canvas
        my_canvas.text('REPONSE DETECTEE', center=True)
        my_canvas.show()
        flag_triggered = True  # Set the flag to True after the first trigger
    
    list_ldn.append(loudness)
    list_ct.append(clock.time())


end_time = clock.time()


# Close the audio stream
stream.stop_stream()
stream.close()
p.terminate()
print("%f ms total duration" % (end_time - start_time))
print(f"Trial n° {trial_count} stops recording", end="\n\n")


# Saves the sound files
wf = wave.open(FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()

Data saving and threshold detection

# Place this at the very end of the trial_sequence - preferably just after a blank screen to buffer any potential lags

import pandas as pd
import numpy as np
import plotly.express as px

# Creates a dataframe with clock.time, response, and trial time
df = pd.DataFrame({'clock_time': list_ct, 'loudness': list_ldn})
df['trial_time'] = df['clock_time'] - start_time


# Calculate the mean and standard deviation of the 'loudness' column
loudness_mean = np.mean(df['loudness'])
loudness_std = np.std(df['loudness'])


# Calculate the threshold based on nSD
threshold_nSD = loudness_mean + loudness_std * nSD


# Calculate the peak loudness
peak_loudness = np.max(df['loudness'])


# Calculate the threshold based on minTH percentage of peak loudness
threshold_minTH = minTH * peak_loudness


# Determine the sound threshold as the higher of the two calculated thresholds
sound_threshold = max(threshold_nSD, threshold_minTH)


# Compute response time only if clock.time() - start_time > 100 ms
if df['trial_time'].iloc[-1] > 0.1:
    try:
        response_time = df.loc[df['loudness'] > sound_threshold, 'trial_time'].iloc[0]
        response = 1
        var.trial_start = start_time
        var.trial_end = end_time
        var.loudness = df['loudness'].tolist()
    except IndexError:
        response_time = None
        response = 0
        var.trial_start = start_time
        var.trial_end = end_time
        var.loudness = None
else:
    response_time = None
    response = 0
    var.trial_start = start_time
    var.trial_end = end_time
    var.loudness = None


if response == 1:
    print("Vocal key detected")
else:
    print("Vocal key not detected")

# If a keyboard response is also used, this will append the suffix 'voice_key' to OS variables response and response_time
responses.add(response=response, response_time=response_time, item='voice_key')

# Lists the variables to include in the data file - DO NOT use a logger if you use this.
INCLUDE = ['subject_nr', 'trial_count', 'practice', 'response_time', 'correct', 'response', 'trial_start', 'trial_end', 'loudness', 'target', 'sound_threshold', 'id_acteur', 'genre_acteur', 'age_acteur', 'emotion_face', 'emotion_body', 'congruence', 'condition', 'image', 'x_coordination', 'direction', 'correct_response']
log.write_vars([log_var for log_var in var.inspect() if log_var in INCLUDE])

# Add new variables to the DataFrame
df['subject_nr'] = subject_nr
df['trial_count'] = trial_count
df['response_threshold'] = (df['loudness'] > sound_threshold).astype(int)

# Save the DataFrame to a CSV file
csv_filename = f"{experiment_path}\\{subject_nr}\\p_{trial_count}.csv"
df.to_csv(csv_filename, index=False)

# Optional: Create and save an interactive plot - comment out if unnecessary
fig = px.line(df, x="trial_time", y="loudness", markers=True)
fig.update_yaxes(range=[0, 0.05])
if response_time is not None:
    fig.add_vline(x=response_time)
    fig.update_layout(
        title_text=f"The response time was {response_time}"
    )
else:
    fig.update_layout(
        title_text="No response was detected")


fig.write_html(experiment_path + "\\" + str(subject_nr) + "\\"+ str(trial_count) + ".html")

It is not perfect but it does the job - I hope this helps and I would love to get feedbacks if you notice anything

sebastiaan · August 15

Thank you! 🤗 Really great to see that you're sharing this.

Howdy, Stranger!

Categories

voice key detection

Comments

Howdy, Stranger!

Quick Links

Categories

voice key detection

Comments