package org.apache.parquet.hadoop;

import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import net.openhft.hashing.LongHashFunction;
import org.apache.commons.lang3.RandomStringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.column.Encoding;
import org.apache.parquet.column.ParquetProperties;
import org.apache.parquet.column.values.bloomfilter.BloomFilter;
import org.apache.parquet.crypto.SingleRow;
import org.apache.parquet.example.data.Group;
import org.apache.parquet.example.data.simple.SimpleGroupFactory;
import org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.parquet.hadoop.example.ExampleParquetWriter;
import org.apache.parquet.hadoop.example.GroupReadSupport;
import org.apache.parquet.hadoop.example.GroupWriteSupport;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import org.apache.parquet.hadoop.util.HadoopInputFile;
import org.apache.parquet.hadoop.util.HadoopOutputFile;
import org.apache.parquet.io.OutputFile;
import org.apache.parquet.io.PositionOutputStream;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.schema.GroupType;
import org.apache.parquet.schema.InvalidSchemaException;
import org.apache.parquet.schema.LogicalTypeAnnotation;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
import org.apache.parquet.schema.PrimitiveType;
import org.apache.parquet.schema.Type;
import org.apache.parquet.schema.Types;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

/* loaded from: input_file:org/apache/parquet/hadoop/TestParquetWriter.class */
public class TestParquetWriter {

    @Rule
    public TemporaryFolder temp = new TemporaryFolder();

    /* loaded from: input_file:org/apache/parquet/hadoop/TestParquetWriter$TestOutputFile.class */
    private static class TestOutputFile implements OutputFile {
        private final OutputFile outputFile;

        TestOutputFile(Path path, Configuration configuration) throws IOException {
            this.outputFile = HadoopOutputFile.fromPath(path, configuration);
        }

        public PositionOutputStream create(long j) throws IOException {
            return this.outputFile.create(j);
        }

        public PositionOutputStream createOrOverwrite(long j) throws IOException {
            return this.outputFile.createOrOverwrite(j);
        }

        public boolean supportsBlockSize() {
            return this.outputFile.supportsBlockSize();
        }

        public long defaultBlockSize() {
            return this.outputFile.defaultBlockSize();
        }
    }

    @Test
    public void test() throws Exception {
        Configuration configuration = new Configuration();
        Path path = new Path("target/tests/TestParquetWriter/");
        TestUtils.enforceEmptyDir(configuration, path);
        MessageType parseMessageType = MessageTypeParser.parseMessageType("message test { required binary binary_field; required int32 int32_field; required int64 int64_field; required boolean boolean_field; required float float_field; required double double_field; required fixed_len_byte_array(3) flba_field; required int96 int96_field; } ");
        GroupWriteSupport.setSchema(parseMessageType, configuration);
        SimpleGroupFactory simpleGroupFactory = new SimpleGroupFactory(parseMessageType);
        HashMap hashMap = new HashMap();
        hashMap.put("10-" + ParquetProperties.WriterVersion.PARQUET_1_0, Encoding.PLAIN_DICTIONARY);
        hashMap.put("1000-" + ParquetProperties.WriterVersion.PARQUET_1_0, Encoding.PLAIN);
        hashMap.put("10-" + ParquetProperties.WriterVersion.PARQUET_2_0, Encoding.RLE_DICTIONARY);
        hashMap.put("1000-" + ParquetProperties.WriterVersion.PARQUET_2_0, Encoding.DELTA_BYTE_ARRAY);
        Iterator it = Arrays.asList(10, 1000).iterator();
        while (it.hasNext()) {
            int intValue = ((Integer) it.next()).intValue();
            for (ParquetProperties.WriterVersion writerVersion : ParquetProperties.WriterVersion.values()) {
                Path path2 = new Path(path, writerVersion.name() + "_" + intValue);
                ParquetWriter build = ExampleParquetWriter.builder(new TestOutputFile(path2, configuration)).withCompressionCodec(CompressionCodecName.UNCOMPRESSED).withRowGroupSize(1024).withPageSize(1024).withDictionaryPageSize(512).enableDictionaryEncoding().withValidation(false).withWriterVersion(writerVersion).withConf(configuration).build();
                for (int i = 0; i < 1000; i++) {
                    build.write(simpleGroupFactory.newGroup().append("binary_field", "test" + (i % intValue)).append(SingleRow.INT32_FIELD_NAME, 32).append("int64_field", 64L).append(SingleRow.BOOLEAN_FIELD_NAME, true).append(SingleRow.FLOAT_FIELD_NAME, 1.0f).append(SingleRow.DOUBLE_FIELD_NAME, 2.0d).append(SingleRow.FIXED_LENGTH_BINARY_FIELD_NAME, "foo").append("int96_field", Binary.fromConstantByteArray(new byte[12])));
                }
                build.close();
                ParquetReader build2 = ParquetReader.builder(new GroupReadSupport(), path2).withConf(configuration).build();
                for (int i2 = 0; i2 < 1000; i2++) {
                    Group group = (Group) build2.read();
                    Assert.assertEquals("test" + (i2 % intValue), group.getBinary("binary_field", 0).toStringUsingUTF8());
                    Assert.assertEquals(32L, group.getInteger(SingleRow.INT32_FIELD_NAME, 0));
                    Assert.assertEquals(64L, group.getLong("int64_field", 0));
                    Assert.assertEquals(true, Boolean.valueOf(group.getBoolean(SingleRow.BOOLEAN_FIELD_NAME, 0)));
                    Assert.assertEquals(1.0d, group.getFloat(SingleRow.FLOAT_FIELD_NAME, 0), 0.001d);
                    Assert.assertEquals(2.0d, group.getDouble(SingleRow.DOUBLE_FIELD_NAME, 0), 0.001d);
                    Assert.assertEquals("foo", group.getBinary(SingleRow.FIXED_LENGTH_BINARY_FIELD_NAME, 0).toStringUsingUTF8());
                    Assert.assertEquals(Binary.fromConstantByteArray(new byte[12]), group.getInt96("int96_field", 0));
                }
                build2.close();
                ParquetMetadata readFooter = ParquetFileReader.readFooter(configuration, path2, ParquetMetadataConverter.NO_FILTER);
                Iterator it2 = readFooter.getBlocks().iterator();
                while (it2.hasNext()) {
                    for (ColumnChunkMetaData columnChunkMetaData : ((BlockMetaData) it2.next()).getColumns()) {
                        if (columnChunkMetaData.getPath().toDotString().equals("binary_field")) {
                            String str = intValue + "-" + writerVersion;
                            Encoding encoding = (Encoding) hashMap.get(str);
                            Assert.assertTrue(str + ":" + columnChunkMetaData.getEncodings() + " should contain " + encoding, columnChunkMetaData.getEncodings().contains(encoding));
                        }
                    }
                }
                Assert.assertEquals("Object model property should be example", "example", readFooter.getFileMetaData().getKeyValueMetaData().get("writer.model.name"));
            }
        }
    }

    @Test
    public void testBadWriteSchema() throws IOException {
        File newFile = this.temp.newFile("test.parquet");
        newFile.delete();
        TestUtils.assertThrows("Should reject a schema with an empty group", InvalidSchemaException.class, () -> {
            ExampleParquetWriter.builder(new Path(newFile.toString())).withType((MessageType) Types.buildMessage().addField(new GroupType(Type.Repetition.REQUIRED, "invalid_group", new Type[0])).named("invalid_message")).build();
            return null;
        });
        Assert.assertFalse("Should not create a file when schema is rejected", newFile.exists());
    }

    @Test
    public void testNullValuesWithPageRowLimit() throws IOException {
        MessageType messageType = (MessageType) ((Types.GroupBuilder) Types.buildMessage().optionalList().optionalElement(PrimitiveType.PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named("str_list")).named("msg");
        Configuration configuration = new Configuration();
        GroupWriteSupport.setSchema(messageType, configuration);
        Group newGroup = new SimpleGroupFactory(messageType).newGroup();
        File newFile = this.temp.newFile();
        newFile.delete();
        Path path = new Path(newFile.getAbsolutePath());
        ParquetWriter build = ExampleParquetWriter.builder(path).withPageRowCountLimit(10).withConf(configuration).build();
        for (int i = 0; i < 100; i++) {
            try {
                build.write(newGroup);
            } catch (Throwable th) {
                if (build != null) {
                    try {
                        build.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        }
        if (build != null) {
            build.close();
        }
        ParquetReader build2 = ParquetReader.builder(new GroupReadSupport(), path).build();
        try {
            int i2 = 0;
            for (Group group = (Group) build2.read(); group != null; group = (Group) build2.read()) {
                Assert.assertEquals(newGroup.toString(), group.toString());
                i2++;
            }
            Assert.assertEquals("Number of written records should be equal to the read one", 100L, i2);
            if (build2 != null) {
                build2.close();
            }
        } catch (Throwable th3) {
            if (build2 != null) {
                try {
                    build2.close();
                } catch (Throwable th4) {
                    th3.addSuppressed(th4);
                }
            }
            throw th3;
        }
    }

    @Test
    public void testParquetFileWithBloomFilter() throws IOException {
        MessageType messageType = (MessageType) ((Types.GroupBuilder) Types.buildMessage().required(PrimitiveType.PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named("name")).named("msg");
        String[] strArr = {"hello", "parquet", "bloom", "filter"};
        Configuration configuration = new Configuration();
        GroupWriteSupport.setSchema(messageType, configuration);
        SimpleGroupFactory simpleGroupFactory = new SimpleGroupFactory(messageType);
        File newFile = this.temp.newFile();
        newFile.delete();
        Path path = new Path(newFile.getAbsolutePath());
        ParquetWriter build = ExampleParquetWriter.builder(path).withPageRowCountLimit(10).withConf(configuration).withDictionaryEncoding(false).withBloomFilterEnabled("name", true).build();
        try {
            for (String str : strArr) {
                build.write(simpleGroupFactory.newGroup().append("name", str));
            }
            if (build != null) {
                build.close();
            }
            ParquetFileReader open = ParquetFileReader.open(HadoopInputFile.fromPath(path, new Configuration()));
            try {
                BlockMetaData blockMetaData = (BlockMetaData) open.getFooter().getBlocks().get(0);
                BloomFilter readBloomFilter = open.getBloomFilterDataReader(blockMetaData).readBloomFilter((ColumnChunkMetaData) blockMetaData.getColumns().get(0));
                for (String str2 : strArr) {
                    Assert.assertTrue(readBloomFilter.findHash(LongHashFunction.xx(0L).hashBytes(Binary.fromString(str2).toByteBuffer())));
                }
                if (open != null) {
                    open.close();
                }
            } catch (Throwable th) {
                if (open != null) {
                    try {
                        open.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        } catch (Throwable th3) {
            if (build != null) {
                try {
                    build.close();
                } catch (Throwable th4) {
                    th3.addSuppressed(th4);
                }
            }
            throw th3;
        }
    }

    @Test
    public void testParquetFileWithBloomFilterWithFpp() throws IOException {
        double[] dArr = {0.01d, 0.05d, 0.1d, 0.15d, 0.2d, 0.25d};
        HashSet hashSet = new HashSet();
        while (hashSet.size() < 100000) {
            hashSet.add(RandomStringUtils.randomAlphabetic(12));
        }
        MessageType messageType = (MessageType) ((Types.GroupBuilder) Types.buildMessage().required(PrimitiveType.PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named("name")).named("msg");
        Configuration configuration = new Configuration();
        GroupWriteSupport.setSchema(messageType, configuration);
        SimpleGroupFactory simpleGroupFactory = new SimpleGroupFactory(messageType);
        for (int i = 0; i < dArr.length; i++) {
            File newFile = this.temp.newFile();
            newFile.delete();
            Path path = new Path(newFile.getAbsolutePath());
            ParquetWriter build = ExampleParquetWriter.builder(path).withPageRowCountLimit(10).withConf(configuration).withDictionaryEncoding(false).withBloomFilterEnabled("name", true).withBloomFilterNDV("name", 100000).withBloomFilterFPP("name", dArr[i]).build();
            try {
                Iterator it = hashSet.iterator();
                while (it.hasNext()) {
                    build.write(simpleGroupFactory.newGroup().append("name", (String) it.next()));
                }
                if (build != null) {
                    build.close();
                }
                hashSet.clear();
                ParquetFileReader open = ParquetFileReader.open(HadoopInputFile.fromPath(path, new Configuration()));
                try {
                    BlockMetaData blockMetaData = (BlockMetaData) open.getFooter().getBlocks().get(0);
                    BloomFilter readBloomFilter = open.getBloomFilterDataReader(blockMetaData).readBloomFilter((ColumnChunkMetaData) blockMetaData.getColumns().get(0));
                    int i2 = 0;
                    while (hashSet.size() < 100000) {
                        String randomAlphabetic = RandomStringUtils.randomAlphabetic(12 - 2);
                        if (hashSet.add(randomAlphabetic) && readBloomFilter.findHash(LongHashFunction.xx(0L).hashBytes(Binary.fromString(randomAlphabetic).toByteBuffer()))) {
                            i2++;
                        }
                    }
                    Assert.assertTrue(((double) i2) < ((double) 100000) * (dArr[i] * 1.1d) && i2 > 0);
                    if (open != null) {
                        open.close();
                    }
                } catch (Throwable th) {
                    if (open != null) {
                        try {
                            open.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    }
                    throw th;
                }
            } catch (Throwable th3) {
                if (build != null) {
                    try {
                        build.close();
                    } catch (Throwable th4) {
                        th3.addSuppressed(th4);
                    }
                }
                throw th3;
            }
        }
    }

    @Test
    public void testParquetFileWritesExpectedNumberOfBlocks() throws IOException {
        testParquetFileNumberOfBlocks(100, TestParquetWriterAppendBlocks.FILE_SIZE, 1);
        testParquetFileNumberOfBlocks(1, 1, 3);
    }

    private void testParquetFileNumberOfBlocks(int i, int i2, int i3) throws IOException {
        MessageType messageType = (MessageType) ((Types.GroupBuilder) Types.buildMessage().required(PrimitiveType.PrimitiveTypeName.BINARY).as(LogicalTypeAnnotation.stringType()).named("str")).named("msg");
        Configuration configuration = new Configuration();
        GroupWriteSupport.setSchema(messageType, configuration);
        File newFile = this.temp.newFile();
        this.temp.delete();
        Path path = new Path(newFile.getAbsolutePath());
        ParquetWriter build = ExampleParquetWriter.builder(path).withConf(configuration).withRowGroupSize(1).withMinRowCountForPageSizeCheck(i).withMaxRowCountForPageSizeCheck(i2).build();
        try {
            SimpleGroupFactory simpleGroupFactory = new SimpleGroupFactory(messageType);
            build.write(simpleGroupFactory.newGroup().append("str", "foo"));
            build.write(simpleGroupFactory.newGroup().append("str", "bar"));
            build.write(simpleGroupFactory.newGroup().append("str", "baz"));
            if (build != null) {
                build.close();
            }
            ParquetFileReader open = ParquetFileReader.open(HadoopInputFile.fromPath(path, configuration));
            try {
                Assert.assertEquals(i3, open.getFooter().getBlocks().size());
                if (open != null) {
                    open.close();
                }
            } catch (Throwable th) {
                if (open != null) {
                    try {
                        open.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        } catch (Throwable th3) {
            if (build != null) {
                try {
                    build.close();
                } catch (Throwable th4) {
                    th3.addSuppressed(th4);
                }
            }
            throw th3;
        }
    }
}
