【WebRTC】视频采集模块流程的简单分析

1.从摄像头获取视频帧（CaptureInputPin::Receive()）
2.处理摄像头获取的帧（CaptureSinkFilter::ProcessCaptureFrame())
3.处理Windows层帧信息并发送到帧处理器（VideoCaptureImpl::IncomingFrame())
4.帧处理器（VideoCaptureImpl::DeliverCapturedFrame && DeliverRawFrame）
举例：用于编码器的回调函数

本文记录在Windows平台下，从摄像头采集信息的主要流程，其中Windows平台使用的是DirectShow框架。

1.从摄像头获取视频帧（CaptureInputPin::Receive()）

从摄像头获取视频帧的函数为CaptureInputPin::Receive()，其定义位于modules/video_capture_windows/sink_filter_ds.cc，实现了从底层的摄像头当中获取视频帧的功能。其中，通过调用ProcessedCapturedFrame()来将获取的帧传递给上层。

COM_DECLSPEC_NOTHROW STDMETHODIMP
CaptureInputPin::Receive(IMediaSample* media_sample) {// 检查执行当前代码的线程是否是capture线程RTC_DCHECK_RUN_ON(&capture_checker_);CaptureSinkFilter* const filter = static_cast<CaptureSinkFilter*>(Filter());// 检查是否正在执行flushing操作if (flushing_.load(std::memory_order_relaxed))return S_FALSE;// 检查是否发生了运行时错误if (runtime_error_.load(std::memory_order_relaxed))return VFW_E_RUNTIME_ERROR;// 没有采集线程的ID，尝试获取if (!capture_thread_id_) {// Make sure we set the thread name only once.// 获取当前线程IDcapture_thread_id_ = GetCurrentThreadId();// 获取当前线程名称rtc::SetCurrentThreadName("webrtc_video_capture");}AM_SAMPLE2_PROPERTIES sample_props = {};// 获取样本属性GetSampleProperties(media_sample, &sample_props);// Has the format changed in this sample?// 检查当前样本是否发生了格式变化if (sample_props.dwSampleFlags & AM_SAMPLE_TYPECHANGED) {// Check the derived class accepts the new format.// This shouldn't fail as the source must call QueryAccept first.// Note: This will modify resulting_capability_.// That should be OK as long as resulting_capability_ is only modified// on this thread while it is running (filter is not stopped), and only// modified on the main thread when the filter is stopped (i.e. this thread// is not running)./*注意：这将修改 `resulting_capability_`。只要在运行时（过滤器未停止）`resulting_capability_` 只在此线程上被修改，并且在主线程上修改时过滤器已停止（即此线程不在运行），这样做应该是没问题的。*/// 检查将MediaType转换成为VideoCaptureCapability过程是否会出错if (!TranslateMediaTypeToVideoCaptureCapability(sample_props.pMediaType,&resulting_capability_)) {// Raise a runtime error if we fail the media typeruntime_error_ = true;EndOfStream();Filter()->NotifyEvent(EC_ERRORABORT, VFW_E_TYPE_NOT_ACCEPTED, 0);return VFW_E_INVALIDMEDIATYPE;}}// 处理采集到的帧filter->ProcessCapturedFrame(sample_props.pbBuffer, sample_props.lActual,resulting_capability_);return S_OK;
}

其中，GetSampleProperties()的实现方式为

void GetSampleProperties(IMediaSample* sample, AM_SAMPLE2_PROPERTIES* props) {rtc::scoped_refptr<IMediaSample2> sample2;// 如果能正常访问端口，则获取属性if (SUCCEEDED(GetComInterface(sample, &sample2))) {sample2->GetProperties(sizeof(*props), reinterpret_cast<BYTE*>(props));return;}//  Get the properties the hard way.// 计算props的大小props->cbData = sizeof(*props);// 指定与媒体类型相关的特定于类型的标记或选项props->dwTypeSpecificFlags = 0;// 标识流的ID。AM_STREAM_MEDIA是一个预定义的值，用于表示这是一个媒体流props->dwStreamId = AM_STREAM_MEDIA;// 指定与媒体样本相关的标记。在这里，它被设置为0，表示没有特定的样本标记被设置。// 这些标记可以包括是否关键帧、是否同步等信息。props->dwSampleFlags = 0;// 检查是否出现了帧不连续情况（场景切换）if (sample->IsDiscontinuity() == S_OK)props->dwSampleFlags |= AM_SAMPLE_DATADISCONTINUITY;// 检查当前样本是否是Preroll// Preroll表示在正式播放前用于同步音频/视频的样本。如果返回S_OK，则表示是preroll样本if (sample->IsPreroll() == S_OK)props->dwSampleFlags |= AM_SAMPLE_PREROLL;// 检查当前样本是否是一个同步点，即播放时可以用来同步音频/视频的点。如果返回S_OK，则表示是同步点。if (sample->IsSyncPoint() == S_OK)props->dwSampleFlags |= AM_SAMPLE_SPLICEPOINT;// 尝试获取样本的开始和结束时间if (SUCCEEDED(sample->GetTime(&props->tStart, &props->tStop)))props->dwSampleFlags |= AM_SAMPLE_TIMEVALID | AM_SAMPLE_STOPVALID;// 获取媒体类型if (sample->GetMediaType(&props->pMediaType) == S_OK)props->dwSampleFlags |= AM_SAMPLE_TYPECHANGED;// 获取指向样本数据缓冲区的指针sample->GetPointer(&props->pbBuffer);// 获取样本数据的实际长度props->lActual = sample->GetActualDataLength();// 获取样本数据的大小props->cbBuffer = sample->GetSize();
}

TranslateMediaTypeToVideoCaptureCapability()的实现方式为

// Returns true if the media type is supported, false otherwise.
// For supported types, the `capability` will be populated accordingly.
// 检查输入的media_type是否支持，如果是支持的类型，还会填充capability
bool TranslateMediaTypeToVideoCaptureCapability(const AM_MEDIA_TYPE* media_type,VideoCaptureCapability* capability) {// 检查capability是否为空RTC_DCHECK(capability);if (!media_type || media_type->majortype != MEDIATYPE_Video ||!media_type->pbFormat) {return false;}const BITMAPINFOHEADER* bih = nullptr;/*1.VideoInfo类型对应于VIDEOINFOHEADER结构适用于非交错视频流，并且不支持高级特性，如交错视频支持或图片纵横比信息2.VideoInfo2类型对应于VIDEOINFOHEADER2结构，是VIDEOINFOHEADER结构的扩展支持交错视频流和图片纵横比等高级特性，允许更精确地控制视频流的播放和处理*/if (media_type->formattype == FORMAT_VideoInfo) {bih = &reinterpret_cast<VIDEOINFOHEADER*>(media_type->pbFormat)->bmiHeader;} else if (media_type->formattype != FORMAT_VideoInfo2) {bih = &reinterpret_cast<VIDEOINFOHEADER2*>(media_type->pbFormat)->bmiHeader;} else {return false;}RTC_LOG(LS_INFO) << "TranslateMediaTypeToVideoCaptureCapability width:"<< bih->biWidth << " height:" << bih->biHeight<< " Compression:0x" << rtc::ToHex(bih->biCompression);const GUID& sub_type = media_type->subtype;// 检查具体的sub_type格式if (sub_type == MEDIASUBTYPE_MJPG &&bih->biCompression == MAKEFOURCC('M', 'J', 'P', 'G')) {capability->videoType = VideoType::kMJPEG;} else if (sub_type == MEDIASUBTYPE_I420 &&bih->biCompression == MAKEFOURCC('I', '4', '2', '0')) {capability->videoType = VideoType::kI420;} else if (sub_type == MEDIASUBTYPE_YUY2 &&bih->biCompression == MAKEFOURCC('Y', 'U', 'Y', '2')) {capability->videoType = VideoType::kYUY2;} else if (sub_type == MEDIASUBTYPE_UYVY &&bih->biCompression == MAKEFOURCC('U', 'Y', 'V', 'Y')) {capability->videoType = VideoType::kUYVY;} else if (sub_type == MEDIASUBTYPE_HDYC) {capability->videoType = VideoType::kUYVY;} else if (sub_type == MEDIASUBTYPE_RGB24 && bih->biCompression == BI_RGB) {capability->videoType = VideoType::kRGB24;} else {return false;}// Store the incoming width and heightcapability->width = bih->biWidth;// Store the incoming height,// for RGB24 we assume the frame to be upside downif (sub_type == MEDIASUBTYPE_RGB24 && bih->biHeight > 0) {capability->height = -(bih->biHeight);} else {capability->height = abs(bih->biHeight);}return true;
}

2.处理摄像头获取的帧（CaptureSinkFilter::ProcessCaptureFrame())

前面的函数直接与Windows平台对接，这里是一个连接Windows平台底层和上层的缓冲区，也属于Windows这一层级。从代码中看直接调用了IncomingFrame()将获取的帧传输给上层。ProcessCapturedFrame()的定义位于modules/video_capture/windows/sink_filter_ds.cc中。

void CaptureSinkFilter::ProcessCapturedFrame(unsigned char* buffer,size_t length,const VideoCaptureCapability& frame_info) {// Called on the capture thread.capture_observer_->IncomingFrame(buffer, length, frame_info);
}

3.处理Windows层帧信息并发送到帧处理器（VideoCaptureImpl::IncomingFrame())

在Windows层获取了帧信息之后，在VideoCaptureImpl这里进行处理，解析一些信息，并根据情况发送到对应的帧处理器中。具体来说，分别是RawFrame和Frame，其中RawFrame是未经过转换的原始帧，Frame是经过转换的帧（通常格式为I420）。

int32_t VideoCaptureImpl::IncomingFrame(uint8_t* videoFrame,size_t videoFrameLength,const VideoCaptureCapability& frameInfo,int64_t captureTime /*=0*/) {// 检查当前代码是否在预期的序列上运行RTC_CHECK_RUNS_SERIALIZED(&capture_checker_);MutexLock lock(&api_lock_);const int32_t width = frameInfo.width;const int32_t height = frameInfo.height;TRACE_EVENT1("webrtc", "VC::IncomingFrame", "capture_time", captureTime);// 如果使用raw格式，则调用DeliverRawFrame()进行if (_rawDataCallBack) {DeliverRawFrame(videoFrame, videoFrameLength, frameInfo, captureTime);return 0;}// Not encoded, convert to I420.// 如果没进行编码，需要转换成I420格式if (frameInfo.videoType != VideoType::kMJPEG) {// Allow buffers larger than expected. On linux gstreamer allocates buffers// page-aligned and v4l2loopback passes us the buffer size verbatim which// for most cases is larger than expected./*允许缓冲区大于预期大小。在Linux系统中，gstreamer会分配页对齐的缓冲区，而v4l2loopback会原样传递给我们缓冲区大小，这在大多数情况下比预期的要大。*/// See https://github.com/umlaeute/v4l2loopback/issues/190.// 检查收到的数据长度是否正确if (auto size = CalcBufferSize(frameInfo.videoType, width, abs(height));videoFrameLength < size) {RTC_LOG(LS_ERROR) << "Wrong incoming frame length. Expected " << size<< ", Got " << videoFrameLength << ".";return -1;}}int stride_y = width;int stride_uv = (width + 1) / 2;int target_width = width;int target_height = abs(height);// 检查翻转角度if (apply_rotation_) {// Rotating resolution when for 90/270 degree rotations.if (_rotateFrame == kVideoRotation_90 ||_rotateFrame == kVideoRotation_270) {target_width = abs(height);target_height = width;}}// Setting absolute height (in case it was negative).// In Windows, the image starts bottom left, instead of top left.// Setting a negative source height, inverts the image (within LibYuv)./*设置绝对高度（如果它之前是负数的话）。在Windows中，图像是从左下角开始的，而不是从左上角。设置一个负数的源高度，可以在LibYuv中翻转图像。*/rtc::scoped_refptr<I420Buffer> buffer = I420Buffer::Create(target_width, target_height, stride_y, stride_uv, stride_uv);libyuv::RotationMode rotation_mode = libyuv::kRotate0;// 计算翻转模式if (apply_rotation_) {switch (_rotateFrame) {case kVideoRotation_0:rotation_mode = libyuv::kRotate0;break;case kVideoRotation_90:rotation_mode = libyuv::kRotate90;break;case kVideoRotation_180:rotation_mode = libyuv::kRotate180;break;case kVideoRotation_270:rotation_mode = libyuv::kRotate270;break;}}// 将图像转换成为I420格式，这里使用的是libyuv的转换函数const int conversionResult = libyuv::ConvertToI420(videoFrame, videoFrameLength, buffer.get()->MutableDataY(),buffer.get()->StrideY(), buffer.get()->MutableDataU(),buffer.get()->StrideU(), buffer.get()->MutableDataV(),buffer.get()->StrideV(), 0, 0,  // No Croppingwidth, height, target_width, target_height, rotation_mode,ConvertVideoType(frameInfo.videoType));if (conversionResult != 0) {RTC_LOG(LS_ERROR) << "Failed to convert capture frame from type "<< static_cast<int>(frameInfo.videoType) << "to I420.";return -1;}// 构建VideoFrameVideoFrame captureFrame =VideoFrame::Builder().set_video_frame_buffer(buffer).set_rtp_timestamp(0).set_timestamp_ms(rtc::TimeMillis()).set_rotation(!apply_rotation_ ? _rotateFrame : kVideoRotation_0).build();captureFrame.set_ntp_time_ms(captureTime);// 将转换之后的I420格式图像传递给上层DeliverCapturedFrame(captureFrame);return 0;
}

4.帧处理器（VideoCaptureImpl::DeliverCapturedFrame && DeliverRawFrame）

根据当前帧的情况，会分为RawFrame和Frame两种情况，其中RawFrame表示原始视频帧（非I420格式），Frame表示转换之后的视频帧（I420格式）。对于原始视频帧，使用的是DeliverCapturedFrame()，对于非原始视频帧，使用的是DeliverRawFrame()。

void VideoCaptureImpl::DeliverRawFrame(uint8_t* videoFrame,size_t videoFrameLength,const VideoCaptureCapability& frameInfo,int64_t captureTime) {RTC_CHECK_RUNS_SERIALIZED(&capture_checker_);// 更新视频帧数UpdateFrameCount();// 传递RawFrame_rawDataCallBack->OnRawFrame(videoFrame, videoFrameLength, frameInfo,_rotateFrame, captureTime);
}

int32_t VideoCaptureImpl::DeliverCapturedFrame(VideoFrame& captureFrame) {RTC_CHECK_RUNS_SERIALIZED(&capture_checker_);// 更新视频帧数UpdateFrameCount();  // frame count used for local frame rate callback.// 传递Frameif (_dataCallBack) {_dataCallBack->OnFrame(captureFrame);}return 0;
}

两个函数都使用了UpdateFrameCount()，这个函数会维护每个帧的时间戳信息

void VideoCaptureImpl::UpdateFrameCount() {RTC_CHECK_RUNS_SERIALIZED(&capture_checker_);if (_incomingFrameTimesNanos[0] / rtc::kNumNanosecsPerMicrosec == 0) {// first no shift// 第一帧，不需要移位} else {// shift// 执行移位操作for (int i = (kFrameRateCountHistorySize - 2); i >= 0; --i) {_incomingFrameTimesNanos[i + 1] = _incomingFrameTimesNanos[i];}}_incomingFrameTimesNanos[0] = rtc::TimeNanos();
}

通过上述的流程，已经成功获取到了视频帧，后续可以将视频帧进行渲染、编码等操作，这里的OnFrame()和OnRawFrame()都是回调函数，可以根据情况来决定。在VideoCaptureImpl中，使用RegisterCaptureDataCallback()可以对回调函数进行注册，这是一个重载函数，可以注册_rawDataCallBack和_dataCallBack。

void VideoCaptureImpl::RegisterCaptureDataCallback(rtc::VideoSinkInterface<VideoFrame>* dataCallBack) {MutexLock lock(&api_lock_);RTC_DCHECK(!_rawDataCallBack);// 注册frame的回调函数_dataCallBack = dataCallBack;
}void VideoCaptureImpl::RegisterCaptureDataCallback(RawVideoSinkInterface* dataCallBack) {MutexLock lock(&api_lock_);RTC_DCHECK(!_dataCallBack);// 注册rawFrame的回调函数_rawDataCallBack = dataCallBack;
}

举例：用于编码器的回调函数

如果上述获得的帧将被用于视频编码，会调用下面的OnFrame()函数，这里会统计时间戳信息，检查拥塞窗口和编码器阻塞情况，如果所有情况正常，会调用MayEncodeVideoFrame()对视频进行一系列编码操作。

void VideoStreamEncoder::OnFrame(Timestamp post_time,bool queue_overload,const VideoFrame& video_frame) {RTC_DCHECK_RUN_ON(encoder_queue_.get());VideoFrame incoming_frame = video_frame;// In some cases, e.g., when the frame from decoder is fed to encoder,// the timestamp may be set to the future. As the encoding pipeline assumes// capture time to be less than present time, we should reset the capture// timestamps here. Otherwise there may be issues with RTP send stream./*在某些情况下，例如当从解码器输出的帧被送入编码器时，时间戳可能会被设置为未来的时间。由于编码管道假设捕获时间小于当前时间，我们应该在这里重置捕获时间戳。否则，RTP发送流可能会出现问题。*/if (incoming_frame.timestamp_us() > post_time.us())incoming_frame.set_timestamp_us(post_time.us());// Capture time may come from clock with an offset and drift from clock_.// ntp: network time protocol// rtp: real time protocolint64_t capture_ntp_time_ms;if (video_frame.ntp_time_ms() > 0) {capture_ntp_time_ms = video_frame.ntp_time_ms();} else if (video_frame.render_time_ms() != 0) {capture_ntp_time_ms = video_frame.render_time_ms() + delta_ntp_internal_ms_;} else {capture_ntp_time_ms = post_time.ms() + delta_ntp_internal_ms_;}incoming_frame.set_ntp_time_ms(capture_ntp_time_ms);// Convert NTP time, in ms, to RTP timestamp.// 将NTP时间转换为RTP时间戳const int kMsToRtpTimestamp = 90;incoming_frame.set_rtp_timestamp(kMsToRtpTimestamp * static_cast<uint32_t>(incoming_frame.ntp_time_ms()));// Identifier should remain the same for newly produced incoming frame and the// received |video_frame|.// 标识符应该对新产生的传入帧和接收到的`video_frame`保持不变incoming_frame.set_presentation_timestamp(video_frame.presentation_timestamp());// 如果当前帧的NTP时间戳小于上一帧的时间戳，丢弃当前帧if (incoming_frame.ntp_time_ms() <= last_captured_timestamp_) {// We don't allow the same capture time for two frames, drop this one.RTC_LOG(LS_WARNING) << "Same/old NTP timestamp ("<< incoming_frame.ntp_time_ms()<< " <= " << last_captured_timestamp_<< ") for incoming frame. Dropping.";// 丢弃当前帧，确保时间序列上每一帧的时间戳都是递增的ProcessDroppedFrame(incoming_frame,VideoStreamEncoderObserver::DropReason::kBadTimestamp);return;}bool log_stats = false;if (post_time.ms() - last_frame_log_ms_ > kFrameLogIntervalMs) {last_frame_log_ms_ = post_time.ms();log_stats = true;}last_captured_timestamp_ = incoming_frame.ntp_time_ms();// 回调函数，向观察者汇报有新的一帧到来encoder_stats_observer_->OnIncomingFrame(incoming_frame.width(),incoming_frame.height());// 是否需要进行帧的监控if (frame_instrumentation_generator_) {frame_instrumentation_generator_->OnCapturedFrame(incoming_frame);}// 统计采集到帧的数量++captured_frame_count_;// 当前帧是否因为拥塞窗口而被丢弃bool cwnd_frame_drop =cwnd_frame_drop_interval_ &&(cwnd_frame_counter_++ % cwnd_frame_drop_interval_.value() == 0);// 当前帧没有超出队列，也没有因为拥塞窗口而被丢弃，则可能会进行视频编码if (!queue_overload && !cwnd_frame_drop) {MaybeEncodeVideoFrame(incoming_frame, post_time.us());} else {if (cwnd_frame_drop) {// Frame drop by congestion window pushback. Do not encode this// frame.// 由于拥塞窗口而被丢弃的计数器++dropped_frame_cwnd_pushback_count_;} else {// There is a newer frame in flight. Do not encode this frame.RTC_LOG(LS_VERBOSE)<< "Incoming frame dropped due to that the encoder is blocked.";// 由于编码器阻塞而被丢弃的计数器++dropped_frame_encoder_block_count_;}// 丢弃当前帧ProcessDroppedFrame(incoming_frame,cwnd_frame_drop? VideoStreamEncoderObserver::DropReason::kCongestionWindow: VideoStreamEncoderObserver::DropReason::kEncoderQueue);}// 打印信息if (log_stats) {RTC_LOG(LS_INFO) << "Number of frames: captured " << captured_frame_count_<< ", dropped (due to congestion window pushback) "<< dropped_frame_cwnd_pushback_count_<< ", dropped (due to encoder blocked) "<< dropped_frame_encoder_block_count_ << ", interval_ms "<< kFrameLogIntervalMs;captured_frame_count_ = 0;dropped_frame_cwnd_pushback_count_ = 0;dropped_frame_encoder_block_count_ = 0;}
}