52 #if defined(_WIN32) && !defined(__CYGWIN__) 55 #include <sys/select.h> 60 #include <sphinxbase/fe.h> 65 #define BLOCKSIZE 1024 67 static const arg_t cont_args_def[] = {
68 waveform_to_cepstral_command_line_macro(),
73 "Argument file giving extra arguments."},
77 "Name of audio device to use for input."},
81 "Transcribe audio from microphone."},
85 "Name of audio file to use for input."},
89 "Write a single cleaned file."},
95 static int (*read_audio) (int16 * buf,
int len);
97 static const char *infile_path;
99 static int32 singlefile;
105 #if (defined(_WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE) 112 tmo.tv_usec = ms * 1000;
114 select(0, NULL, NULL, NULL, &tmo);
119 read_audio_file(int16 * buf,
int len)
122 E_FATAL(
"Failed to read audio from file\n");
125 return fread(buf,
sizeof(int16), len, infile);
129 read_audio_adev(int16 * buf,
int len)
134 E_FATAL(
"Failed to read audio from mic\n");
137 while ((k = ad_read(ad, buf, len)) == 0)
148 int16 pcm_buf[BLOCKSIZE];
150 int16 *voiced_buf = NULL;
151 int32 voiced_nsamps, out_frameidx, uttstart = 0;
152 char file_name[1024];
153 uint8 cur_vad_state, vad_state, writing;
154 int uttno, uttlen, sample_rate;
155 int32 nframes, nframes_tmp;
156 int16 frame_size, frame_shift, frame_rate;
159 sample_rate = (int) cmd_ln_float32_r(config,
"-samprate");
160 frame_rate = cmd_ln_int32_r(config,
"-frate");
162 (int32) (cmd_ln_float32_r(config,
"-wlen") * sample_rate + 0.5);
164 (int32) (sample_rate / cmd_ln_int32_r(config,
"-frate") + 0.5);
165 nframes = (BLOCKSIZE - frame_size) / frame_shift;
178 while ((k = read_audio(pcm_buf, BLOCKSIZE)) > 0) {
179 int16
const *pcm_buf_tmp;
180 pcm_buf_tmp = &pcm_buf[0];
182 nframes_tmp = nframes;
183 fe_process_frames_ext(fe, &pcm_buf_tmp, &k, cep_buf,
184 &nframes_tmp, voiced_buf,
185 &voiced_nsamps, &out_frameidx);
186 if (out_frameidx > 0) {
187 uttstart = out_frameidx;
189 vad_state = fe_get_vad_state(fe);
190 if (!cur_vad_state && vad_state) {
194 sprintf(file_name,
"%s%04d.raw", infile_path, uttno);
195 if ((file = fopen(file_name,
"wb")) == NULL)
199 sprintf(file_name,
"%s.raw", infile_path);
200 if ((file = fopen(file_name,
"ab")) == NULL)
207 if (writing && file && voiced_nsamps > 0) {
208 fwrite(voiced_buf,
sizeof(int16), voiced_nsamps, file);
209 uttlen += voiced_nsamps;
212 if (cur_vad_state && !vad_state) {
215 printf(
"Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n",
218 ((
double) uttstart) / frame_rate,
220 ((
double) uttlen) / sample_rate);
222 fe_end_utt(fe, cep_buf[0], &nframes_tmp);
228 cur_vad_state = vad_state;
234 printf(
"Utterance %04d: file %s start %.1f sec length %d samples ( %.2f sec )\n",
237 ((
double) uttstart) / frame_rate,
239 ((
double) uttlen) / sample_rate);
242 fe_end_utt(fe, cep_buf[0], &nframes);
247 main(
int argc,
char *argv[])
259 E_INFO(
"Specify '-infile <file.wav>' to segment a file or '-inmic yes' to segment audio from microphone.\n");
266 if ((infile_path =
cmd_ln_str_r(config,
"-infile")) != NULL) {
267 if ((infile = fopen(infile_path,
"rb")) == NULL) {
271 read_audio = &read_audio_file;
277 (
int) cmd_ln_float32_r(config,
280 E_FATAL(
"Failed to open audio device\n");
283 read_audio = &read_audio_adev;
284 printf(
"Start recording ...\n");
286 if (ad_start_rec(ad) < 0)
287 E_FATAL(
"Failed to start recording\n");
290 for (i = 0; i < 5; i++) {
292 read_audio(buf, 2048);
294 printf(
"You may speak now\n");
298 fe = fe_init_auto_r(config);
Command-line and other configurationparsing and handling.
#define E_INFO(...)
Print logging information to standard error stream.
#define ckd_calloc_2d(d1, d2, sz)
Macro for ckd_calloc_2d
Sphinx's memory allocation/deallocation routines.
Audio recording structure.
SPHINXBASE_EXPORT int cmd_ln_free_r(cmd_ln_t *cmdln)
Release a command-line argument set and all associated strings.
SPHINXBASE_EXPORT cmd_ln_t * cmd_ln_parse_r(cmd_ln_t *inout_cmdln, arg_t const *defn, int32 argc, char *argv[], int32 strict)
Parse a list of strings into argumetns.
#define ARG_STRING
String argument (optional).
Basic type definitions used in Sphinx.
SPHINXBASE_EXPORT char const * cmd_ln_str_r(cmd_ln_t *cmdln, char const *name)
Retrieve a string from a command-line object.
Implementation of logging routines.
#define E_FATAL_SYSTEM(...)
Print error text; Call perror(""); exit(errno);.
#define ARG_BOOLEAN
Boolean (true/false) argument (optional).
SPHINXBASE_EXPORT cmd_ln_t * cmd_ln_parse_file_r(cmd_ln_t *inout_cmdln, arg_t const *defn, char const *filename, int32 strict)
Parse an arguments file by deliminating on " \r\t\n" and putting each tokens into an argv[] for cmd_l...
generic live audio interface for recording and playback
Argument definition structure.
Opaque structure used to hold the results of command-line parsing.
SPHINXBASE_EXPORT void ckd_free_2d(void *ptr)
Free a 2-D array (ptr) previously allocated by ckd_calloc_2d.
#define cmd_ln_boolean_r(c, n)
Retrieve a boolean value from a command-line object.
#define E_FATAL(...)
Exit with non-zero status after error message.
Structure for the front-end computation.
SPHINXBASE_EXPORT ad_rec_t * ad_open_dev(const char *dev, int32 samples_per_sec)
Open a specific audio device for recording.