DirectSoundでマイクからの音声入力をキャプチャして保存する
だけなのに、いいサンプルが少なく苦戦した・・・。
MSオフィシャルのドキュメントも見づらい・・・・。
のでソースを置いて自分用メモ。
#include <dsound.h> #include <time.h> #include <stdio.h> #include <windows.h> #include <time.h> #define SAMPLERATE 44100 //Hz #define CHANNELS 2 // 1:monoral, 2:stereo #define BITSPERSAMPLE 16 // bits per sample #pragma comment(lib, "dsound.lib") #pragma comment(lib, "dxguid.lib") typedef struct { char riffID[4]; unsigned long fileSize; char waveID[4]; } RIFFHeader; typedef struct { char chunkID[4]; long chunkSize; short wFormatTag; unsigned short wChannels; unsigned long dwSamplesPerSec; unsigned long dwAvgBytesPerSec; unsigned short wBlockAlign; unsigned short wBitsPerSample; /* Note: there may be additional fields here, depending upon wFormatTag. */ } FormatChunk; typedef struct { char chunkID[4]; long chunkSize; } DataChunk; BOOL CALLBACK DSEnumProc(LPGUID lpGUID, LPCTSTR lpszDesc, LPCTSTR lpszDrvName, LPVOID lpContext ) { if (lpGUID != NULL) { // NULL only for "Primary Sound Driver". printf("Driver name: %s, Description:%s\n", lpszDrvName, lpszDesc); } return TRUE; } int writeWaveFile(char* copiedBuffer, long copiedLength) { RIFFHeader riffHeader = {{'R','I','F','F'}, sizeof(FormatChunk) + sizeof(DataChunk) + 4 + copiedLength, {'W','A','V','E'}}; FormatChunk formatChunk = {{'f','m','t',' '}, 16, 1, CHANNELS, SAMPLERATE, SAMPLERATE*CHANNELS*BITSPERSAMPLE/8, CHANNELS*BITSPERSAMPLE/8, BITSPERSAMPLE}; DataChunk dataChunk = {{'d','a','t','a'}, copiedLength}; // ファイル出力 HANDLE hFile = CreateFile("test1.wav" , GENERIC_WRITE , 0 , NULL , CREATE_ALWAYS , FILE_ATTRIBUTE_NORMAL , NULL); if (hFile == INVALID_HANDLE_VALUE) { MessageBox(NULL , TEXT("ファイルが開けません") , NULL , MB_OK); return -1; } DWORD dwWriteSize; WriteFile(hFile, (void*)&riffHeader, sizeof(RIFFHeader), &dwWriteSize, NULL); WriteFile(hFile, (void*)&formatChunk, sizeof(FormatChunk), &dwWriteSize, NULL); WriteFile(hFile, (void*)&dataChunk, sizeof(DataChunk), &dwWriteSize, NULL); WriteFile(hFile, copiedBuffer, copiedLength, &dwWriteSize, NULL); CloseHandle(hFile); printf("%d bytes written.\n", dwWriteSize); return 0; } //int _tmain(int argc, char* argv[]) int main(int argc, char* argv[]) { LPDIRECTSOUNDCAPTURE captureDevice = NULL;//DirectSoundCaptureDeviceオブジェクト LPDIRECTSOUNDCAPTUREBUFFER captureBuffer = NULL;//DirectSoundCaptureBufferオブジェクト WAVEFORMATEX wfx = {WAVE_FORMAT_PCM, CHANNELS, SAMPLERATE, SAMPLERATE*CHANNELS*BITSPERSAMPLE/8, CHANNELS*BITSPERSAMPLE/8, BITSPERSAMPLE, 0}; // 単純なPCMのWAVEデータを定義 // wFormatTag、Waveのフォーマット // nChannels モノラル1 ステレオ2(データセットの種類) // nSamplesPerSec 1秒あたりのサンプル数 // mAvgBytesPerSec、1秒あたりのバイト数。nSamplesPerSec*nBlockAlign。 // nBlockAlign 1サンプルのバイト数。nChannels×wBitsPerSample÷8 8・・・8ビット=1バイト // wBitsPerSample 1サンプルあたりのビット数。8か16 // cbSize 常に0 DSCBUFFERDESC bufferDescriber = {sizeof(DSCBUFFERDESC), 0, wfx.nAvgBytesPerSec*1, 0, &wfx, 0, NULL}; // DirectSound Capture Buffer DESC キャプチャ バッファを記述する構造体 // dwSize この構造体のサイズ(=sizeof(DSCBUFFERDESC)) // dwFlags デバイス付加能力の指定フラグ(未使用につき0) // dwBufferBytes バッファサイズ(byte) // dwReserved 予約領域(=0) // lpwfxFormat キャプチャフォーマットをWAVEFORMATX構造体で指定 // dwFXCount エフェクトを使用しない場合は0 // lpDSCFXDesc ハードウェアサポートのエフェクト指定 DWORD readablePos, capturedPos, readBufferPos, lockLength, capturedLength, wrappedCapturedLength; DWORD copiedLength = 0; void *capturedData = NULL, *wrappedCapturedData = NULL; char *copiedBuffer; int recordDurationSec = 3; //録音時間(秒) HRESULT Hret; time_t start, end; CoInitialize(NULL); DirectSoundCaptureCreate8( NULL, &captureDevice, NULL ); // サウンドデバイスが複数ある場合に使用 // DirectSoundEnumerate((LPDSENUMCALLBACK)DSEnumProc, (VOID*)NULL); readBufferPos = 0; copiedBuffer = (char*)malloc( wfx.nAvgBytesPerSec * wfx.nChannels * wfx.wBitsPerSample / 8 * recordDurationSec * 2); captureDevice->CreateCaptureBuffer(&bufferDescriber,&captureBuffer,NULL); start = time(NULL); captureBuffer->Start(DSCBSTART_LOOPING); Sleep(100); // キャプチャが少し進んでからデータ取得開始 while(1) { captureBuffer->GetCurrentPosition(&capturedPos, &readablePos); if ( readablePos > readBufferPos ) lockLength = readablePos - readBufferPos; else lockLength = bufferDescriber.dwBufferBytes - readBufferPos + readablePos; // printf("Lock startRead:%d, readable:%d, locklen:%d, captured:%d\n", // readBufferPos, readablePos, lockLength, capturedPos); Hret = captureBuffer->Lock(readBufferPos, lockLength, &capturedData, &capturedLength, &wrappedCapturedData, &wrappedCapturedLength, NULL); if( Hret != DS_OK ) { printf("Lock error:%x\n", Hret); } else { // printf("buffer read, buf1:%d, buf2:%d\n", capturedLength, wrappedCapturedLength); } if (capturedData != NULL) { memcpy(copiedBuffer+copiedLength, capturedData, capturedLength); copiedLength += capturedLength; readBufferPos += capturedLength; if (readBufferPos >= bufferDescriber.dwBufferBytes) readBufferPos = 0; } if (wrappedCapturedData != NULL) { // Ring buffer wrapped memcpy(copiedBuffer+copiedLength, wrappedCapturedData, wrappedCapturedLength); copiedLength += wrappedCapturedLength; readBufferPos = wrappedCapturedLength; } Hret = captureBuffer->Unlock( capturedData, capturedLength, wrappedCapturedData, wrappedCapturedLength); end = time(NULL); if((end-start) > recordDurationSec ){ break; } Sleep(100); } printf("%d bytes recorded.\n", copiedLength); writeWaveFile(copiedBuffer, copiedLength); captureBuffer->Stop(); free(copiedBuffer); CoUninitialize(); return 0; }