Check out Kite (free AI Coding Assistant) → Link



Buy Me a Coffee? Your support is much appreciated!

Source Code:  

 

import json
import azure.cognitiveservices.speech as speech

API_KEY = '<API_KEY>'
ENDPOINT = '<ENDPOINT>'

media_file_path = '<audio file path>'

translation_config = speech.translation.SpeechTranslationConfig(
    subscription=API_KEY, endpoint=ENDPOINT)

translation_config.speech_recognition_language = 'ja-JP'
translation_config.add_target_language('en')


audio_config = speech.audio.AudioConfig(filename=media_file_path)
recognizer = speech.translation.TranslationRecognizer(
    translation_config=translation_config, audio_config=audio_config)

result = recognizer.recognize_once()
vars(result)

# translatin status
result.reason

source_language_text = result.text
duration = result.duration // pow(60, 4)
result.translations['en']

translation_json = json.loads(result.json)
translation_json['RecognitionStatus']
translation_json['Duration']
translation_json['Text']
for translated in translation_json['Translation']['Translations']:
    print(translated['Language'])
    print(translated['Text'])
    print()



recognizer = speech.translation.TranslationRecognizer(
    translation_config=translation_config, audio_config=audio_config)
outputs = []
toStop = False
while not toStop:
    if result.reason == speech.ResultReason.Canceled:
        toStop = True
        break
    result = recognizer.recognize_once()
    translation_json = json.loads(result.json)
    for translated in translation_json['Translation']['Translations']:        
        print(translated['Language'])
        print(translated['Text'])
        outputs.append({'language': translated['Language'],  'text': translated['Text']})
        
print(outputs)