package org.apache.hudi.utilities.sources;

import java.util.Collections;
import org.apache.hudi.DataSourceReadOptions;
import org.apache.hudi.DataSourceUtils;
import org.apache.hudi.common.config.TypedProperties;
import org.apache.hudi.common.model.HoodieRecord;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import org.apache.hudi.hive.SlashEncodedDayPartitionValueExtractor;
import org.apache.hudi.utilities.schema.SchemaProvider;
import org.apache.hudi.utilities.sources.helpers.IncrSourceHelper;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;

/* loaded from: input_file:org/apache/hudi/utilities/sources/HoodieIncrSource.class */
public class HoodieIncrSource extends RowSource {
    private static final Logger LOG = LogManager.getLogger(HoodieIncrSource.class);

    /* loaded from: input_file:org/apache/hudi/utilities/sources/HoodieIncrSource$Config.class */
    public static class Config {
        static final String HOODIE_SRC_BASE_PATH = "hoodie.deltastreamer.source.hoodieincr.path";
        static final String NUM_INSTANTS_PER_FETCH = "hoodie.deltastreamer.source.hoodieincr.num_instants";
        static final String HOODIE_SRC_PARTITION_FIELDS = "hoodie.deltastreamer.source.hoodieincr.partition.fields";
        static final String HOODIE_SRC_PARTITION_EXTRACTORCLASS = "hoodie.deltastreamer.source.hoodieincr.partition.extractor.class";

        @Deprecated
        public static final String READ_LATEST_INSTANT_ON_MISSING_CKPT = "hoodie.deltastreamer.source.hoodieincr.read_latest_on_missing_ckpt";
        public static final String MISSING_CHECKPOINT_STRATEGY = "hoodie.deltastreamer.source.hoodieincr.missing.checkpoint.strategy";
        static final String SOURCE_FILE_FORMAT = "hoodie.deltastreamer.source.hoodieincr.file.format";
        static final String DEFAULT_SOURCE_FILE_FORMAT = "parquet";
        static final String HOODIE_DROP_ALL_META_FIELDS_FROM_SOURCE = "hoodie.deltastreamer.source.hoodieincr.drop.all.meta.fields.from.source";
        static final Integer DEFAULT_NUM_INSTANTS_PER_FETCH = 5;
        static final String DEFAULT_HOODIE_SRC_PARTITION_EXTRACTORCLASS = SlashEncodedDayPartitionValueExtractor.class.getCanonicalName();
        public static final Boolean DEFAULT_READ_LATEST_INSTANT_ON_MISSING_CKPT = false;
        public static final Boolean DEFAULT_HOODIE_DROP_ALL_META_FIELDS_FROM_SOURCE = false;
    }

    public HoodieIncrSource(TypedProperties typedProperties, JavaSparkContext javaSparkContext, SparkSession sparkSession, SchemaProvider schemaProvider) {
        super(typedProperties, javaSparkContext, sparkSession, schemaProvider);
    }

    @Override // org.apache.hudi.utilities.sources.RowSource
    public Pair<Option<Dataset<Row>>, String> fetchNextBatch(Option<String> option, long j) {
        DataSourceUtils.checkRequiredProperties(this.props, Collections.singletonList("hoodie.deltastreamer.source.hoodieincr.path"));
        String string = this.props.getString("hoodie.deltastreamer.source.hoodieincr.path");
        int integer = this.props.getInteger("hoodie.deltastreamer.source.hoodieincr.num_instants", Config.DEFAULT_NUM_INSTANTS_PER_FETCH.intValue());
        boolean z = this.props.getBoolean(Config.READ_LATEST_INSTANT_ON_MISSING_CKPT, Config.DEFAULT_READ_LATEST_INSTANT_ON_MISSING_CKPT.booleanValue());
        IncrSourceHelper.MissingCheckpointStrategy valueOf = this.props.containsKey(Config.MISSING_CHECKPOINT_STRATEGY) ? IncrSourceHelper.MissingCheckpointStrategy.valueOf(this.props.getString(Config.MISSING_CHECKPOINT_STRATEGY)) : null;
        if (z) {
            valueOf = IncrSourceHelper.MissingCheckpointStrategy.READ_LATEST;
        }
        Pair<String, Pair<String, String>> calculateBeginAndEndInstants = IncrSourceHelper.calculateBeginAndEndInstants(this.sparkContext, string, integer, option.isPresent() ? option.get().isEmpty() ? Option.empty() : option : Option.empty(), valueOf);
        if (!calculateBeginAndEndInstants.getValue().getKey().equals(calculateBeginAndEndInstants.getValue().getValue())) {
            return Pair.of(Option.of((calculateBeginAndEndInstants.getKey().equals(DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL()) ? this.sparkSession.read().format("org.apache.hudi").option(DataSourceReadOptions.QUERY_TYPE().key(), DataSourceReadOptions.QUERY_TYPE_INCREMENTAL_OPT_VAL()).option(DataSourceReadOptions.BEGIN_INSTANTTIME().key(), calculateBeginAndEndInstants.getValue().getLeft()).option(DataSourceReadOptions.END_INSTANTTIME().key(), calculateBeginAndEndInstants.getValue().getRight()).option(DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES().key(), this.props.getString(DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES().key(), DataSourceReadOptions.INCREMENTAL_FALLBACK_TO_FULL_TABLE_SCAN_FOR_NON_EXISTING_FILES().defaultValue())).load(string) : this.sparkSession.read().format("org.apache.hudi").option(DataSourceReadOptions.QUERY_TYPE().key(), DataSourceReadOptions.QUERY_TYPE_SNAPSHOT_OPT_VAL()).load(string).filter(String.format("%s > '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, calculateBeginAndEndInstants.getRight().getLeft())).filter(String.format("%s <= '%s'", HoodieRecord.COMMIT_TIME_METADATA_FIELD, calculateBeginAndEndInstants.getRight().getRight()))).drop(this.props.getBoolean("hoodie.deltastreamer.source.hoodieincr.drop.all.meta.fields.from.source", Config.DEFAULT_HOODIE_DROP_ALL_META_FIELDS_FROM_SOURCE.booleanValue()) ? (String[]) HoodieRecord.HOODIE_META_COLUMNS.stream().toArray(i -> {
                return new String[i];
            }) : (String[]) HoodieRecord.HOODIE_META_COLUMNS.stream().filter(str -> {
                return !str.equals(HoodieRecord.PARTITION_PATH_METADATA_FIELD);
            }).toArray(i2 -> {
                return new String[i2];
            }))), calculateBeginAndEndInstants.getRight().getRight());
        }
        LOG.warn("Already caught up. Begin Checkpoint was :" + calculateBeginAndEndInstants.getValue().getKey());
        return Pair.of(Option.empty(), calculateBeginAndEndInstants.getValue().getKey());
    }
}
