/*
 * Decompiled with CFR 0.152.
 */
package org.deeplearning4j.spark.util;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.UUID;
import lombok.NonNull;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;
import org.datavec.spark.util.SerializableHadoopConfig;
import org.nd4j.common.loader.FileBatch;

public class SparkDataUtils {
    private SparkDataUtils() {
    }

    public static void createFileBatchesLocal(File inputDirectory, boolean recursive, File outputDirectory, int batchSize) throws IOException {
        SparkDataUtils.createFileBatchesLocal(inputDirectory, null, recursive, outputDirectory, batchSize);
    }

    public static void createFileBatchesLocal(File inputDirectory, String[] extensions, boolean recursive, File outputDirectory, int batchSize) throws IOException {
        if (!outputDirectory.exists()) {
            outputDirectory.mkdirs();
        }
        ArrayList c = new ArrayList(FileUtils.listFiles((File)inputDirectory, (String[])extensions, (boolean)recursive));
        Collections.shuffle(c);
        ArrayList<String> list = new ArrayList<String>();
        ArrayList<byte[]> bytes = new ArrayList<byte[]>();
        for (int i = 0; i < c.size(); ++i) {
            list.add(((File)c.get(i)).toURI().toString());
            bytes.add(FileUtils.readFileToByteArray((File)((File)c.get(i))));
            if (list.size() != batchSize) continue;
            SparkDataUtils.process(list, bytes, outputDirectory);
        }
        if (list.size() > 0) {
            SparkDataUtils.process(list, bytes, outputDirectory);
        }
    }

    private static void process(List<String> paths, List<byte[]> bytes, File outputDirectory) throws IOException {
        FileBatch fb = new FileBatch(bytes, paths);
        String name = UUID.randomUUID().toString().replaceAll("-", "") + ".zip";
        File f = new File(outputDirectory, name);
        try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(f));){
            fb.writeAsZip((OutputStream)bos);
        }
        paths.clear();
        bytes.clear();
    }

    public static void createFileBatchesSpark(JavaRDD<String> filePaths, String rootOutputDir, int batchSize, JavaSparkContext sc) {
        SparkDataUtils.createFileBatchesSpark(filePaths, rootOutputDir, batchSize, sc.hadoopConfiguration());
    }

    public static void createFileBatchesSpark(JavaRDD<String> filePaths, final String rootOutputDir, final int batchSize, @NonNull Configuration hadoopConfig) {
        if (hadoopConfig == null) {
            throw new NullPointerException("hadoopConfig is marked non-null but is null");
        }
        final SerializableHadoopConfig conf = new SerializableHadoopConfig(hadoopConfig);
        long count = filePaths.count();
        long maxPartitions = count / (long)batchSize;
        JavaRDD repartitioned = filePaths.repartition(Math.max(filePaths.getNumPartitions(), (int)maxPartitions));
        repartitioned.foreachPartition((VoidFunction)new VoidFunction<Iterator<String>>(){

            public void call(Iterator<String> stringIterator) throws Exception {
                ArrayList<String> list = new ArrayList<String>();
                ArrayList<byte[]> bytes = new ArrayList<byte[]>();
                FileSystem fs = FileSystem.get((Configuration)conf.getConfiguration());
                while (stringIterator.hasNext()) {
                    byte[] fileBytes;
                    String inFile = stringIterator.next();
                    try (BufferedInputStream bis = new BufferedInputStream((InputStream)fs.open(new Path(inFile)));){
                        fileBytes = IOUtils.toByteArray((InputStream)bis);
                    }
                    list.add(inFile);
                    bytes.add(fileBytes);
                    if (list.size() != batchSize) continue;
                    this.process(list, bytes);
                }
                if (list.size() > 0) {
                    this.process(list, bytes);
                }
            }

            private void process(List<String> paths, List<byte[]> bytes) throws IOException {
                FileBatch fb = new FileBatch(bytes, paths);
                String name = UUID.randomUUID().toString().replaceAll("-", "") + ".zip";
                String outPath = FilenameUtils.concat((String)rootOutputDir, (String)name);
                FileSystem fileSystem = FileSystem.get((Configuration)conf.getConfiguration());
                try (BufferedOutputStream bos = new BufferedOutputStream((OutputStream)fileSystem.create(new Path(outPath)));){
                    fb.writeAsZip((OutputStream)bos);
                }
                paths.clear();
                bytes.clear();
            }
        });
    }
}

