DirectSoundでマイクからの音声入力をキャプチャして保存する

だけなのに、いいサンプルが少なく苦戦した・・・。

MSオフィシャルのドキュメントも見づらい・・・・。

のでソースを置いて自分用メモ。

#include <dsound.h>
#include <time.h>
#include <stdio.h>
#include <windows.h>
#include <time.h>

#define SAMPLERATE 44100 //Hz
#define CHANNELS 2 // 1:monoral, 2:stereo
#define BITSPERSAMPLE 16 // bits per sample

#pragma comment(lib, "dsound.lib")
#pragma comment(lib, "dxguid.lib")

typedef struct {
	char riffID[4];
	unsigned long fileSize;
	char waveID[4];
} RIFFHeader;

typedef struct {
	char chunkID[4];
	long chunkSize; 
	short wFormatTag;
	unsigned short wChannels;
	unsigned long dwSamplesPerSec;
	unsigned long dwAvgBytesPerSec;
	unsigned short wBlockAlign;
	unsigned short wBitsPerSample;
/* Note: there may be additional fields here, depending upon wFormatTag. */ 
} FormatChunk;

typedef struct {
	char chunkID[4];
	long chunkSize; 
} DataChunk;

BOOL CALLBACK DSEnumProc(LPGUID lpGUID,  
             LPCTSTR lpszDesc, 
             LPCTSTR lpszDrvName,  
             LPVOID lpContext ) 
{ 
	if (lpGUID != NULL) { //  NULL only for "Primary Sound Driver". 
		printf("Driver name: %s, Description:%s\n", lpszDrvName, lpszDesc);
	}
	return TRUE;
} 

int writeWaveFile(char* copiedBuffer, long copiedLength) {

	RIFFHeader riffHeader
		= {{'R','I','F','F'},
		sizeof(FormatChunk) + sizeof(DataChunk) + 4 + copiedLength,
		{'W','A','V','E'}};

	FormatChunk formatChunk
		= {{'f','m','t',' '}, 16, 1, CHANNELS, SAMPLERATE,
		SAMPLERATE*CHANNELS*BITSPERSAMPLE/8, CHANNELS*BITSPERSAMPLE/8, BITSPERSAMPLE};

	DataChunk dataChunk = {{'d','a','t','a'}, copiedLength};

	// ファイル出力
	HANDLE hFile = CreateFile("test1.wav" , GENERIC_WRITE , 0 , NULL ,
		CREATE_ALWAYS , FILE_ATTRIBUTE_NORMAL , NULL);
	if (hFile == INVALID_HANDLE_VALUE) {
		MessageBox(NULL , TEXT("ファイルが開けません") , NULL , MB_OK);
		return -1;
	}

	DWORD dwWriteSize;
	WriteFile(hFile, (void*)&riffHeader, sizeof(RIFFHeader), &dwWriteSize, NULL);
	WriteFile(hFile, (void*)&formatChunk, sizeof(FormatChunk), &dwWriteSize, NULL);
	WriteFile(hFile, (void*)&dataChunk, sizeof(DataChunk), &dwWriteSize, NULL);
	WriteFile(hFile, copiedBuffer, copiedLength, &dwWriteSize, NULL);
	CloseHandle(hFile);
	printf("%d bytes written.\n", dwWriteSize);

	return 0;
}

//int _tmain(int argc, char* argv[])
int main(int argc, char* argv[])
{
	LPDIRECTSOUNDCAPTURE captureDevice = NULL;//DirectSoundCaptureDeviceオブジェクト
	LPDIRECTSOUNDCAPTUREBUFFER captureBuffer = NULL;//DirectSoundCaptureBufferオブジェクト

	WAVEFORMATEX wfx = {WAVE_FORMAT_PCM, CHANNELS, SAMPLERATE,
		SAMPLERATE*CHANNELS*BITSPERSAMPLE/8, CHANNELS*BITSPERSAMPLE/8, BITSPERSAMPLE, 0};
	// 単純なPCMのWAVEデータを定義
	// wFormatTag、Waveのフォーマット
	// nChannels モノラル1 ステレオ2(データセットの種類)
	// nSamplesPerSec 1秒あたりのサンプル数
	// mAvgBytesPerSec、1秒あたりのバイト数。nSamplesPerSec*nBlockAlign。
	// nBlockAlign 1サンプルのバイト数。nChannels×wBitsPerSample÷8 8・・・8ビット=1バイト
	// wBitsPerSample 1サンプルあたりのビット数。8か16
	// cbSize 常に0

	DSCBUFFERDESC bufferDescriber
		= {sizeof(DSCBUFFERDESC), 0, wfx.nAvgBytesPerSec*1, 0, &wfx, 0, NULL};
	// DirectSound Capture Buffer DESC キャプチャ バッファを記述する構造体
	// dwSize この構造体のサイズ(=sizeof(DSCBUFFERDESC))
	// dwFlags デバイス付加能力の指定フラグ(未使用につき0)
	// dwBufferBytes バッファサイズ(byte)
	// dwReserved 予約領域(=0)
	// lpwfxFormat キャプチャフォーマットをWAVEFORMATX構造体で指定
	// dwFXCount エフェクトを使用しない場合は0
	// lpDSCFXDesc ハードウェアサポートのエフェクト指定

	DWORD readablePos, capturedPos, readBufferPos, lockLength, capturedLength, wrappedCapturedLength;
	DWORD copiedLength = 0;
	void *capturedData = NULL, *wrappedCapturedData = NULL;
	char *copiedBuffer;
	int recordDurationSec = 3; //録音時間(秒)
	HRESULT Hret;
	time_t start, end;

	CoInitialize(NULL);
	DirectSoundCaptureCreate8( NULL, &captureDevice, NULL );

	// サウンドデバイスが複数ある場合に使用
	// DirectSoundEnumerate((LPDSENUMCALLBACK)DSEnumProc, (VOID*)NULL);

	readBufferPos = 0;
	copiedBuffer = (char*)malloc(
		wfx.nAvgBytesPerSec * wfx.nChannels * wfx.wBitsPerSample / 8 * recordDurationSec * 2); 
	captureDevice->CreateCaptureBuffer(&bufferDescriber,&captureBuffer,NULL);
	start = time(NULL);
	captureBuffer->Start(DSCBSTART_LOOPING);
	Sleep(100); // キャプチャが少し進んでからデータ取得開始
	
	while(1)
	{
		captureBuffer->GetCurrentPosition(&capturedPos, &readablePos);
		if  ( readablePos > readBufferPos ) lockLength = readablePos - readBufferPos;
		else lockLength = bufferDescriber.dwBufferBytes - readBufferPos + readablePos;

		// printf("Lock startRead:%d, readable:%d, locklen:%d, captured:%d\n",
		//	readBufferPos, readablePos, lockLength, capturedPos);
		Hret = captureBuffer->Lock(readBufferPos, lockLength,
			&capturedData, &capturedLength,
			&wrappedCapturedData, &wrappedCapturedLength,
			NULL);
		if( Hret != DS_OK ) {
			printf("Lock error:%x\n", Hret);
		} else {
			// printf("buffer read, buf1:%d, buf2:%d\n", capturedLength, wrappedCapturedLength);
		}

		if (capturedData != NULL) {
			memcpy(copiedBuffer+copiedLength, capturedData, capturedLength);
			copiedLength += capturedLength;
			readBufferPos += capturedLength;
			if (readBufferPos >= bufferDescriber.dwBufferBytes)
				readBufferPos = 0;
		}

		if (wrappedCapturedData != NULL) { // Ring buffer wrapped
			memcpy(copiedBuffer+copiedLength, wrappedCapturedData, wrappedCapturedLength);
			copiedLength += wrappedCapturedLength;
			readBufferPos = wrappedCapturedLength;
		}

		Hret = captureBuffer->Unlock( capturedData, capturedLength,
			wrappedCapturedData, wrappedCapturedLength);

		end = time(NULL);
		if((end-start) > recordDurationSec ){
			break;
		}
		Sleep(100);
	}
	printf("%d bytes recorded.\n", copiedLength);
	writeWaveFile(copiedBuffer, copiedLength);
	captureBuffer->Stop();
	free(copiedBuffer);
	CoUninitialize();

	return 0;
}