package org.springframework.ai.azure.openai;

import com.azure.ai.openai.OpenAIClient;
import com.azure.ai.openai.models.AudioTranscriptionFormat;
import com.azure.ai.openai.models.AudioTranscriptionOptions;
import com.azure.core.http.rest.RequestOptions;
import java.io.IOException;
import java.util.List;
import org.springframework.ai.audio.transcription.AudioTranscription;
import org.springframework.ai.audio.transcription.AudioTranscriptionPrompt;
import org.springframework.ai.audio.transcription.AudioTranscriptionResponse;
import org.springframework.ai.azure.openai.AzureOpenAiAudioTranscriptionOptions;
import org.springframework.ai.azure.openai.metadata.AzureOpenAiAudioTranscriptionResponseMetadata;
import org.springframework.ai.model.Model;
import org.springframework.ai.model.ModelOptionsUtils;
import org.springframework.core.io.Resource;
import org.springframework.util.Assert;
import org.springframework.util.StringUtils;

/* loaded from: input_file:org/springframework/ai/azure/openai/AzureOpenAiAudioTranscriptionModel.class */
public class AzureOpenAiAudioTranscriptionModel implements Model<AudioTranscriptionPrompt, AudioTranscriptionResponse> {
    private static final List<AudioTranscriptionFormat> JSON_FORMATS = List.of(AudioTranscriptionFormat.JSON, AudioTranscriptionFormat.VERBOSE_JSON);
    private static final String FILENAME_MARKER = "filename.wav";
    private final OpenAIClient openAIClient;
    private final AzureOpenAiAudioTranscriptionOptions defaultOptions;

    public AzureOpenAiAudioTranscriptionModel(OpenAIClient openAIClient, AzureOpenAiAudioTranscriptionOptions azureOpenAiAudioTranscriptionOptions) {
        this.openAIClient = openAIClient;
        this.defaultOptions = azureOpenAiAudioTranscriptionOptions;
    }

    private static byte[] toBytes(Resource resource) {
        try {
            return resource.getInputStream().readAllBytes();
        } catch (IOException e) {
            throw new IllegalArgumentException("Failed to read resource: " + String.valueOf(resource), e);
        }
    }

    public String call(Resource resource) {
        return call(new AudioTranscriptionPrompt(resource)).getResult().getOutput();
    }

    public AudioTranscriptionResponse call(AudioTranscriptionPrompt audioTranscriptionPrompt) {
        String deploymentName = getDeploymentName(audioTranscriptionPrompt);
        AudioTranscriptionOptions audioTranscriptionOptions = toAudioTranscriptionOptions(audioTranscriptionPrompt);
        if (!JSON_FORMATS.contains(audioTranscriptionOptions.getResponseFormat())) {
            String str = (String) this.openAIClient.getAudioTranscriptionTextWithResponse(deploymentName, FILENAME_MARKER, audioTranscriptionOptions, (RequestOptions) null).getValue();
            return new AudioTranscriptionResponse(new AudioTranscription(str), AzureOpenAiAudioTranscriptionResponseMetadata.from(str));
        }
        com.azure.ai.openai.models.AudioTranscription audioTranscription = this.openAIClient.getAudioTranscription(deploymentName, FILENAME_MARKER, audioTranscriptionOptions);
        List list = null;
        if (audioTranscription.getWords() != null) {
            list = audioTranscription.getWords().stream().map(audioTranscriptionWord -> {
                return new AzureOpenAiAudioTranscriptionOptions.StructuredResponse.Word(audioTranscriptionWord.getWord(), Float.valueOf((float) audioTranscriptionWord.getStart().toSeconds()), Float.valueOf((float) audioTranscriptionWord.getEnd().toSeconds()));
            }).toList();
        }
        List list2 = null;
        if (audioTranscription.getSegments() != null) {
            list2 = audioTranscription.getSegments().stream().map(audioTranscriptionSegment -> {
                return new AzureOpenAiAudioTranscriptionOptions.StructuredResponse.Segment(Integer.valueOf(audioTranscriptionSegment.getId()), Integer.valueOf(audioTranscriptionSegment.getSeek()), Float.valueOf((float) audioTranscriptionSegment.getStart().toSeconds()), Float.valueOf((float) audioTranscriptionSegment.getEnd().toSeconds()), audioTranscriptionSegment.getText(), audioTranscriptionSegment.getTokens(), Float.valueOf((float) audioTranscriptionSegment.getTemperature()), Float.valueOf((float) audioTranscriptionSegment.getAvgLogprob()), Float.valueOf((float) audioTranscriptionSegment.getCompressionRatio()), Float.valueOf((float) audioTranscriptionSegment.getNoSpeechProb()));
            }).toList();
        }
        AzureOpenAiAudioTranscriptionOptions.StructuredResponse structuredResponse = new AzureOpenAiAudioTranscriptionOptions.StructuredResponse(audioTranscription.getLanguage(), audioTranscription.getDuration() == null ? null : Float.valueOf((float) audioTranscription.getDuration().toSeconds()), audioTranscription.getText(), list, list2);
        return new AudioTranscriptionResponse(new AudioTranscription(structuredResponse.text()), AzureOpenAiAudioTranscriptionResponseMetadata.from(structuredResponse));
    }

    private String getDeploymentName(AudioTranscriptionPrompt audioTranscriptionPrompt) {
        org.springframework.ai.audio.transcription.AudioTranscriptionOptions options = audioTranscriptionPrompt.getOptions();
        if (this.defaultOptions != null) {
            options = (org.springframework.ai.audio.transcription.AudioTranscriptionOptions) ModelOptionsUtils.merge(options, this.defaultOptions, AzureOpenAiAudioTranscriptionOptions.class);
        }
        if (options instanceof AzureOpenAiAudioTranscriptionOptions) {
            String deploymentName = ((AzureOpenAiAudioTranscriptionOptions) options).getDeploymentName();
            if (StringUtils.hasText(deploymentName)) {
                return deploymentName;
            }
        }
        return options.getModel();
    }

    private AudioTranscriptionOptions toAudioTranscriptionOptions(AudioTranscriptionPrompt audioTranscriptionPrompt) {
        org.springframework.ai.audio.transcription.AudioTranscriptionOptions options = audioTranscriptionPrompt.getOptions();
        if (this.defaultOptions != null) {
            options = (org.springframework.ai.audio.transcription.AudioTranscriptionOptions) ModelOptionsUtils.merge(options, this.defaultOptions, AzureOpenAiAudioTranscriptionOptions.class);
        }
        AudioTranscriptionOptions audioTranscriptionOptions = new AudioTranscriptionOptions(toBytes(audioTranscriptionPrompt.getInstructions()));
        if (options instanceof AzureOpenAiAudioTranscriptionOptions) {
            AzureOpenAiAudioTranscriptionOptions azureOpenAiAudioTranscriptionOptions = (AzureOpenAiAudioTranscriptionOptions) options;
            String model = azureOpenAiAudioTranscriptionOptions.getModel();
            if (StringUtils.hasText(model)) {
                audioTranscriptionOptions.setModel(model);
            }
            String language = azureOpenAiAudioTranscriptionOptions.getLanguage();
            if (StringUtils.hasText(language)) {
                audioTranscriptionOptions.setLanguage(language);
            }
            String prompt = azureOpenAiAudioTranscriptionOptions.getPrompt();
            if (StringUtils.hasText(prompt)) {
                audioTranscriptionOptions.setPrompt(prompt);
            }
            Float temperature = azureOpenAiAudioTranscriptionOptions.getTemperature();
            if (temperature != null) {
                audioTranscriptionOptions.setTemperature(Double.valueOf(temperature.doubleValue()));
            }
            AzureOpenAiAudioTranscriptionOptions.TranscriptResponseFormat responseFormat = azureOpenAiAudioTranscriptionOptions.getResponseFormat();
            List<AzureOpenAiAudioTranscriptionOptions.GranularityType> granularityType = azureOpenAiAudioTranscriptionOptions.getGranularityType();
            if (responseFormat != null) {
                audioTranscriptionOptions.setResponseFormat(responseFormat.getValue());
                if (responseFormat == AzureOpenAiAudioTranscriptionOptions.TranscriptResponseFormat.VERBOSE_JSON && granularityType == null) {
                    granularityType = List.of(AzureOpenAiAudioTranscriptionOptions.GranularityType.SEGMENT);
                }
            }
            if (granularityType != null) {
                Assert.isTrue(responseFormat == AzureOpenAiAudioTranscriptionOptions.TranscriptResponseFormat.VERBOSE_JSON, "response_format must be set to verbose_json to use timestamp granularities.");
                audioTranscriptionOptions.setTimestampGranularities(granularityType.stream().map((v0) -> {
                    return v0.getValue();
                }).toList());
            }
        }
        return audioTranscriptionOptions;
    }
}
