In Part 2 of the Google Video Intelligence API and Python series, we will learn how to use the API by writing our first Python script to detect different things in a video file.

The Video Intelligence API can detect and extract information it detected in a video footage. The LABEL DETECTION feature identifies objects, locations, activities, animal species, products, and more.

Buy Me a Coffee? Your support is much appreciated!
PayPal Me:
Venmo: @Jie-Jenn

Source Code:

<br />import os, io
import pandas as pd
from import videointelligence

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'GoogleCloudKey_MyServiceAcct.json'
video_client = videointelligence.VideoIntelligenceServiceClient()

features = [videointelligence.enums.Feature.LABEL_DETECTION]

gs_URI = 'gs://dummy_videos/logo clip.mp4'
operation = video_client.annotate_video(gs_URI, features=features)
print('\nProcessing video for label annotations:')

result = operation.result(timeout=120)
annotation_results = result.annotation_results

segment_labels = annotation_results[0].segment_label_annotations

for i, segment_label in enumerate(segment_labels):
    print('Video label description: {}'.format(
    for category_entity in segment_label.category_entities:
        print('\tLabel category description: {}'.format(

    # 1e9 = 1,000,000,000 (a billion) second
    for i, segment in enumerate(segment_label.segments):
        start_time = (segment.segment.start_time_offset.seconds +
                      segment.segment.start_time_offset.nanos / 1e9)
        end_time = (segment.segment.end_time_offset.seconds +
                    segment.segment.end_time_offset.nanos / 1e9)
        positions = '{}s to {}s'.format(start_time, end_time)
        confidence = segment.confidence
        print('\tSegment {}: {}'.format(i, positions))
        print('\tConfidence: {}'.format(confidence))