/* Very simple digital dictation assistant */

/* Copyright (c) 2009  Timo Juhani Lindfors <timo.lindfors@iki.fi> */

/* Permission is hereby granted, free of charge, to any person */
/* obtaining a copy of this software and associated documentation */
/* files (the "Software"), to deal in the Software without */
/* restriction, including without limitation the rights to use, */
/* copy, modify, merge, publish, distribute, sublicense, and/or sell */
/* copies of the Software, and to permit persons to whom the */
/* Software is furnished to do so, subject to the following */
/* conditions: */

/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */

/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES */
/* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND */
/* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT */
/* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, */
/* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING */
/* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR */
/* OTHER DEALINGS IN THE SOFTWARE. */
        
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <sys/time.h>
#include <assert.h>
#include <time.h>

#define SAMPLERATE 44100
typedef int16_t sample;
#define SAMPLES_PER_RECORDING (SAMPLERATE/8)
#define NUM_RECORDINGS 4
#define VOLUME_LIMIT 600

#define REC(i) rec[((i) + NUM_RECORDINGS) % NUM_RECORDINGS]

struct recording {
    int16_t buf[SAMPLES_PER_RECORDING];
    int voice;
    struct timeval time;
};

int max_amplitude(sample *buf) {
    int i;
    sample min, max;
    min = max = buf[0];
    for (i = 1; i < SAMPLES_PER_RECORDING; i++) {
        if (buf[i] < min) {
            min = buf[i];
        }
        if (buf[i] > max) {
            max = buf[i];
        }
    }
    return max - min;
}

int detect_voice(sample *buf) {
    return max_amplitude(buf) > VOLUME_LIMIT;
}

int main(int argc, char *argv[]) {
    struct recording rec[NUM_RECORDINGS];
    int ret, pos, recording, started_up, verbose;

    recording = 0;
    started_up = 0;
    verbose = 0;
    
    for (pos = 0; ;pos++) {
        ret = fread(REC(pos).buf, sizeof(sample), SAMPLES_PER_RECORDING, stdin);
        assert(ret == SAMPLES_PER_RECORDING);
        
        ret = gettimeofday(&REC(pos).time, NULL);
        assert(ret == 0);

        REC(pos).voice = detect_voice(REC(pos).buf);

        if (pos >= 4 - 1) {
            started_up = 1;
        }

        if (verbose > 0) {
            fprintf(stderr, "%d (%d) %d %d\n", pos, started_up, max_amplitude(REC(pos).buf), REC(pos).voice);
        }
        if (started_up) {
            if (recording) {
                int i, count = 0;
                /* If we are recording but we see 4 blocks of silence
                 * in the future (incl. this block), stop recording
                 * now. */
                for (i = pos - (4 - 1); i <= pos; i++) {
                    count += REC(i).voice;
                }
                if (count == 0) {
                    struct tm *timeinfo;
                    recording = 0;
                    timeinfo = gmtime(&REC(pos - (4 - 1)).time.tv_sec);
                    fprintf(stderr, "%d %lu.%06lu %04d-%02d-%02dT%02d:%02d:%02d+0000 waiting\n",
                            pos,
                            REC(pos - (4 - 1)).time.tv_sec,
                            REC(pos - (4 - 1)).time.tv_usec,
                            timeinfo->tm_year+1900,
                            timeinfo->tm_mon+1,
                            timeinfo->tm_mday,
                            timeinfo->tm_hour,
                            timeinfo->tm_min,
                            timeinfo->tm_sec);
                } else {
                    fwrite(REC(pos - (4 - 1)).buf, sizeof(sample), SAMPLES_PER_RECORDING, stdout);
                }
            } else {
                int i, count = 0;
                
                /* If we are not recording but we see 3 blocks out of
                 * 4 blocks with non-silence, start recording */
                for (i = pos - (4 - 1); i <= pos; i++) {
                    count += REC(i).voice;
                }
                if (count >= 3) {
                    struct tm *timeinfo;
                    recording = 1;
                    timeinfo = gmtime(&REC(pos - (4 - 1)).time.tv_sec);
                    fprintf(stderr, "%d %lu.%06lu %04d-%02d-%02dT%02d:%02d:%02d+0000 recording\n",
                            pos,
                            REC(pos - (4 - 1)).time.tv_sec,
                            REC(pos - (4 - 1)).time.tv_usec,
                            timeinfo->tm_year+1900,
                            timeinfo->tm_mon+1,
                            timeinfo->tm_mday,
                            timeinfo->tm_hour,
                            timeinfo->tm_min,
                            timeinfo->tm_sec);
                    fwrite(REC(pos - (4 - 1)).buf, sizeof(sample), SAMPLES_PER_RECORDING, stdout);
                } else {
                    /* discard sample pos - (4 - 1) */
                }
            }
        }
    }
    
    return 0;
}
