package org.apache.flink.formats.parquet;

import java.io.IOException;
import java.util.ArrayList;
import javax.annotation.Nullable;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.io.CheckpointableInputFormat;
import org.apache.flink.api.common.io.FileInputFormat;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.FileInputSplit;
import org.apache.flink.core.fs.Path;
import org.apache.flink.formats.parquet.utils.ParquetRecordReader;
import org.apache.flink.formats.parquet.utils.ParquetSchemaConverter;
import org.apache.flink.formats.parquet.utils.RowReadSupport;
import org.apache.flink.metrics.Counter;
import org.apache.flink.types.Row;
import org.apache.flink.util.Preconditions;
import org.apache.parquet.ParquetReadOptions;
import org.apache.parquet.filter2.compat.FilterCompat;
import org.apache.parquet.filter2.predicate.FilterPredicate;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.util.HadoopInputFile;
import org.apache.parquet.schema.MessageType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/flink/formats/parquet/ParquetInputFormat.class */
public abstract class ParquetInputFormat<E> extends FileInputFormat<E> implements CheckpointableInputFormat<FileInputSplit, Tuple2<Long, Long>> {
    private static final long serialVersionUID = 1;
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) ParquetInputFormat.class);
    private boolean skipWrongSchemaFileSplit;
    private boolean skipCorruptedRecord;
    private boolean skipThisSplit;

    @Nullable
    private TypeInformation[] fieldTypes;

    @Nullable
    private String[] fieldNames;
    private FilterPredicate filterPredicate;
    private transient Counter recordConsumed;

    @Nullable
    private transient MessageType expectedFileSchema;
    private transient ParquetRecordReader<Row> parquetRecordReader;
    public static final String PARQUET_SKIP_WRONG_SCHEMA_SPLITS = "skip.splits.wrong.schema";
    public static final String PARQUET_SKIP_CORRUPTED_RECORD = "skip.corrupted.record";

    /* JADX INFO: Access modifiers changed from: protected */
    public ParquetInputFormat(Path path, MessageType messageType) {
        super(path);
        this.skipWrongSchemaFileSplit = false;
        this.skipCorruptedRecord = false;
        this.skipThisSplit = false;
        this.expectedFileSchema = messageType;
        if (this.expectedFileSchema != null) {
            RowTypeInfo fromParquetType = ParquetSchemaConverter.fromParquetType(this.expectedFileSchema);
            this.fieldTypes = fromParquetType.getFieldTypes();
            this.fieldNames = fromParquetType.getFieldNames();
        }
        this.unsplittable = true;
    }

    public void configure(Configuration configuration) {
        super.configure(configuration);
        if (!this.skipWrongSchemaFileSplit) {
            this.skipWrongSchemaFileSplit = configuration.getBoolean(PARQUET_SKIP_WRONG_SCHEMA_SPLITS, false);
        }
        if (this.skipCorruptedRecord) {
            this.skipCorruptedRecord = configuration.getBoolean(PARQUET_SKIP_CORRUPTED_RECORD, false);
        }
    }

    public void selectFields(String[] strArr) {
        Preconditions.checkNotNull(strArr, "fieldNames");
        this.fieldNames = strArr;
        if (this.expectedFileSchema != null) {
            this.fieldTypes = getFieldTypesFromSchema(strArr, this.expectedFileSchema);
        }
    }

    private TypeInformation[] getFieldTypesFromSchema(String[] strArr, MessageType messageType) {
        RowTypeInfo fromParquetType = ParquetSchemaConverter.fromParquetType(messageType);
        TypeInformation[] typeInformationArr = new TypeInformation[strArr.length];
        for (int i = 0; i < strArr.length; i++) {
            try {
                typeInformationArr[i] = fromParquetType.getTypeAt(strArr[i]);
            } catch (IndexOutOfBoundsException e) {
                throw new IllegalArgumentException(String.format("Fail to access Field %s , which is not contained in the file schema", strArr[i]), e);
            }
        }
        return typeInformationArr;
    }

    public void setFilterPredicate(FilterPredicate filterPredicate) {
        this.filterPredicate = filterPredicate;
    }

    /* renamed from: getCurrentState, reason: merged with bridge method [inline-methods] */
    public Tuple2<Long, Long> m832getCurrentState() {
        return this.parquetRecordReader.getCurrentReadPosition();
    }

    @Override // 
    public void open(FileInputSplit fileInputSplit) throws IOException {
        this.skipThisSplit = false;
        org.apache.hadoop.conf.Configuration configuration = new org.apache.hadoop.conf.Configuration();
        ParquetFileReader parquetFileReader = new ParquetFileReader(HadoopInputFile.fromPath(new org.apache.hadoop.fs.Path(fileInputSplit.getPath().toUri()), configuration), ParquetReadOptions.builder().build());
        MessageType schema = parquetFileReader.getFileMetaData().getSchema();
        if (this.expectedFileSchema == null) {
            if (this.fieldNames == null) {
                RowTypeInfo fromParquetType = ParquetSchemaConverter.fromParquetType(schema);
                this.fieldNames = fromParquetType.getFieldNames();
                this.fieldTypes = fromParquetType.getFieldTypes();
            } else {
                this.fieldTypes = getFieldTypesFromSchema(this.fieldNames, schema);
            }
        }
        MessageType readSchema = getReadSchema(schema, fileInputSplit.getPath());
        if (this.skipThisSplit) {
            LOG.warn(String.format("Escaped the file split [%s] due to mismatch of file schema to expected result schema", fileInputSplit.getPath().toString()));
            return;
        }
        this.parquetRecordReader = new ParquetRecordReader<>(new RowReadSupport(), readSchema, this.filterPredicate == null ? FilterCompat.NOOP : FilterCompat.get(this.filterPredicate));
        this.parquetRecordReader.initialize(parquetFileReader, configuration);
        this.parquetRecordReader.setSkipCorruptedRecord(this.skipCorruptedRecord);
        if (this.recordConsumed == null) {
            this.recordConsumed = getRuntimeContext().getMetricGroup().counter("parquet-records-consumed");
        }
        LOG.debug(String.format("Open ParquetInputFormat with FileInputSplit [%s]", fileInputSplit.getPath().toString()));
    }

    public void reopen(FileInputSplit fileInputSplit, Tuple2<Long, Long> tuple2) throws IOException {
        Preconditions.checkNotNull(fileInputSplit, "reopen() cannot be called on a null split.");
        Preconditions.checkNotNull(tuple2, "reopen() cannot be called with a null initial state.");
        open(fileInputSplit);
        this.parquetRecordReader.seek(((Long) tuple2.f0).longValue(), ((Long) tuple2.f1).longValue());
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String[] getFieldNames() {
        return this.fieldNames;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public TypeInformation[] getFieldTypes() {
        return this.fieldTypes;
    }

    @VisibleForTesting
    protected FilterPredicate getPredicate() {
        return this.filterPredicate;
    }

    public void close() throws IOException {
        if (this.parquetRecordReader != null) {
            this.parquetRecordReader.close();
        }
    }

    public boolean reachedEnd() throws IOException {
        if (this.skipThisSplit) {
            return true;
        }
        return this.parquetRecordReader.reachEnd();
    }

    public E nextRecord(E e) throws IOException {
        if (reachedEnd()) {
            return null;
        }
        this.recordConsumed.inc();
        return convert(this.parquetRecordReader.nextRecord());
    }

    protected abstract E convert(Row row);

    private MessageType getReadSchema(MessageType messageType, Path path) {
        RowTypeInfo fromParquetType = ParquetSchemaConverter.fromParquetType(messageType);
        ArrayList arrayList = new ArrayList();
        for (int i = 0; i < this.fieldNames.length; i++) {
            String str = this.fieldNames[i];
            TypeInformation typeInformation = this.fieldTypes[i];
            if (fromParquetType.getFieldIndex(str) < 0) {
                if (!this.skipWrongSchemaFileSplit) {
                    throw new IllegalArgumentException("Field " + str + " cannot be found in schema of  Parquet file: " + path + ".");
                }
                this.skipThisSplit = true;
                return messageType;
            }
            if (!typeInformation.equals(fromParquetType.getTypeAt(str))) {
                if (!this.skipWrongSchemaFileSplit) {
                    throw new IllegalArgumentException("Expecting type " + typeInformation + " for field " + str + " but found type " + fromParquetType.getTypeAt(str) + " in Parquet file: " + path + ".");
                }
                this.skipThisSplit = true;
                return messageType;
            }
            arrayList.add(messageType.getType(str));
        }
        return new MessageType(messageType.getName(), arrayList);
    }
}
