1. Create decoding graph and insert your dshow texture renderer filter which accept rgb or rgba pixelformats
2. In SetMediaType inspect VIDEOINFOHEADER and get video resolution.
3. Create 2 PBO's with video resolution framesize. Create texture too.
4. Lock 1st PBO (glMapBuffer) and pass pointer to dshow texture renderer. DO NOT UNLOCK NOW!!!
5. Run graph (using IMediaControl::Run)
6. When DoRenderSample occure copy uncompressed frame in PBO memory using pointer in step 4. and notify main render thread that new frame has arrive
7. In main render thread, during update check for new frame notification and unlock PBO buffer, then lock 2nd buffer and pass it to dshow tex renderer. Then you can upload texture using glTexSubImage2D call from 1st PBO. This is a simple double-buffering.
8. On single core CPU add some Sleep in main render thread to leave CPU power to directshow decoding graph. Otherwise, playback might be jerky.
9. Add crittical section object to controll access no PBO pointer in dshow tex renderer to avoid potential crash while main thread change pointer in middle of copying in DoRenderSample.
10. Try to use some fast memcopy functions to improve performances.

void	CPBO::init(	int sx, int sy, int pixbytes, 
					GLenum pixformat, GLenum  pixtype , 
					GLenum usage )
{
	glGenBuffers(1, &m_pboID);
	m_sx = sx;		m_sy = sy;		
	m_pixbytes = pixbytes;		m_pixtype = pixtype;	m_pixformat = pixformat;
	m_usage = usage;

	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, m_pboID);
	glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, sx*sy*pixbytes, NULL, usage);
	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
}


void*	CPBO::LockGetPointer()
{
	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, m_pboID);
	void* memio = glMapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, GL_WRITE_ONLY);
	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
	return memio;
}


void	CPBO::Unlock( )
{
	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, m_pboID);
	glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER_ARB);
	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
}

void	CPBO::Upload( )
{
	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, m_pboID);
	glTexSubImage2D(GL_TEXTURE_RECTANGLE_EXT, 0, 0, 0, m_sx, m_sy, 
					m_pixformat, m_pixtype, BUFFER_OFFSET(0));
	glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, 0);
}



HRESULT CTextureRenderer::DoRenderSample( IMediaSample * pSample )
{
	// wait for main thread to consume previous sample
	while(m_bSampleReady)	Sleep(0);	
	EnterCriticalSection(&m_CriticalSection);
	if (!m_pTxtBuffer)		{ return S_OK; AppendFile("debug.txt","no texture buffer for DoRenderSample()\n");}

    BYTE  *pBmpBuffer, *pTxtBuffer; // Bitmap buffer, texture buffer
    LONG  lTxtPitch;                // Pitch of bitmap, texture

    BYTE  * pbS = NULL;
    DWORD * pdwS = NULL;
    DWORD * pdwD = NULL;
    UINT row, col, dwordWidth;
    
    CheckPointer(pSample,E_POINTER);
//    CheckPointer(m_pTexture,E_UNEXPECTED);

    // Get the video bitmap buffer
    pSample->GetPointer( &pBmpBuffer );

    pTxtBuffer = m_pTxtBuffer;
    // Copy the bits    

    if (m_TextureFormat == GL_BGR)
    {
 		memcpy(pBmpBuffer, pTxtBuffer, m_lVidWidth*m_lVidHeight*3);
    }

    if (m_TextureFormat == GL_BGRA)
    {
		memcpy(pBmpBuffer, pTxtBuffer, m_lVidWidth*m_lVidHeight*4);
    }

	Sleep(1);

   	m_bSampleReady = true;
	LeaveCriticalSection(&m_CriticalSection);

    return S_OK;
}

    
// swap buffers if needed
	if (m_VideoRenderer->m_bSampleReady)
	{
		EnterCriticalSection(&m_VideoRenderer->m_CriticalSection);
			// unlock previous pbo
			m_PBO[m_PBO_upload_idx].Unlock();
			// get pointer to empty buf and clear sample flag
			m_VideoRenderer->m_pTxtBuffer = (BYTE*) m_PBO[m_PBO_render_idx].LockGetPointer();
			m_VideoRenderer->m_bSampleReady = false;
		LeaveCriticalSection(&m_VideoRenderer->m_CriticalSection);

		// upload texture to videocard
		glBindTexture(GL_TEXTURE_RECTANGLE_EXT, m_texid);
		m_PBO[m_PBO_upload_idx].Upload();

		// swapped pbo's so swap idxs too
		if (m_PBO_upload_idx)		{ m_PBO_upload_idx=0; m_PBO_render_idx=1; }
		else						{ m_PBO_upload_idx=1; m_PBO_render_idx=0; }
	}
    
	
	gVidTex.CheckMovieStatus();	
	// draw video o quad
	CCamera::SetOrtho2D();
	glEnable(GL_TEXTURE_RECTANGLE_EXT);
	glBindTexture(GL_TEXTURE_RECTANGLE_EXT,  gVidTex.m_texid);
	DrawTexQuad(0,0,gVidTex.m_VideoRenderer->m_lVidWidth, gVidTex.m_VideoRenderer->m_lVidHeight,
					0.3,0.3,1,1);

  I just wanted to comment on the use of Sleep(0).

Unless you know exactly what you are doing, don't do this. The reason is that Sleep(0) only gives up the remainder of the current thread's time-slice if there is a thread with the same or higher priority waiting to run.

This is something ATI at least used to do in their ICD that took me literally days to hunt down when my app locked up for several seconds at a time. Basically, they used a "poor man's critical section" in hopes of busywaiting would gain a nanosecond here or there, while completely locking up a uniprocessor system in case the window-managing thread is running at another priority than a/the rendering thread.

Please don't make the same mistake. Busy-waiting is usually bad.

To display two alternatives (as this is platform dependent):

- If you need to get notified ASAP, use an event. Just before you wait for the event, raise the thread priority (to a priority higher than the thread you're waiting for). When wait returns, immediately lower your thread priority back. Using this, you will get notified immediately when the event is signalled (I'm fairly certain it will in fact, on a uni-CPU system, be faster than the busy-waiting approach).

- Use a critical section as both synchronizing primitive and the signal (using the same raise/revert thread priority idea), but prepend the call to enter the critical section with a call to TryEnterCriticalSection. On a good day that'll save the three kernel calls to raise/lower thread pri. and waiting for the mutex.
