Note that this starter code is tested only against the latest version and minor changes may be required to use on older versions due to data format changes (e.g., video_id vs. id field, feature names and the number of classes for each of the versions, etc.).
The code for feature extraction can be found in the MediaPipe GitHub YouTube8M example. It extracts both audio and visual features from videos in a single graph.
context: { feature: { key : "id" value: { bytes_list: { value: (Video id) } } } feature: { key : "labels" # video-level labels. value: { int64_list: { value: [ 441, 525 ] } } } feature: { key: "segment_start_times" value: { int64_list: { value: [ 40, 30, 50, 65, 90 ] } } } feature: { key: "segment_end_times" value: { int64_list: { value: [ 45, 35, 55, 70, 95 ] } } } feature: { key: "segment_labels" value: { int64_list: { value: [ 525, 525, 525, 525, 525 ] } } } feature: { key: "segment_scores" value: { float_list: { value: [ 0.0, 0.0, 0.0, 0.0, 1.0 ] } } } } } feature_lists: { # See the frame-level features section. }
context: { feature: { key : "id" value: { bytes_list: { value: (Video id) } } } feature: { key : "labels" value: { int64_list: { value: [1, 522, 11, 172] # label list } } } } feature_lists: { feature_list: { key : "rgb" value: { feature: { bytes_list: { value: [1024 8bit quantized features] } } feature: { bytes_list: { value: [1024 8bit quantized features] } } ... # Repeated for every second, up to 300 } feature_list: { key : "audio" value: { feature: { bytes_list: { value: [128 8bit quantized features] } } feature: { bytes_list: { value: [128 8bit quantized features] } } } ... # Repeated for every second, up to 300 } }
features: { feature: { key : "id" value: { bytes_list: { value: (Video id) } } } feature: { key : "labels" value: { int64_list: { value: [1, 522, 11, 172] # label list } } } feature: { # Average of all 'rgb' features for the video key : "mean_rgb" value: { float_list: { value: [1024 float features] } } } feature: { # Average of all 'audio' features for the video key : "mean_audio" value: { float_list: { value: [128 float features] } } } }