/*
 * Decompiled with CFR 0.152.
 */
package it.unimi.dsi.sux4j.mph;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.StringParser;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import com.martiansoftware.jsap.stringparsers.FileStringParser;
import com.martiansoftware.jsap.stringparsers.ForNameStringParser;
import it.unimi.dsi.bits.BitVector;
import it.unimi.dsi.bits.Fast;
import it.unimi.dsi.bits.HuTuckerTransformationStrategy;
import it.unimi.dsi.bits.TransformationStrategies;
import it.unimi.dsi.bits.TransformationStrategy;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.AbstractLongBigList;
import it.unimi.dsi.fastutil.longs.LongBigList;
import it.unimi.dsi.fastutil.longs.LongIterable;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.FileLinesCollection;
import it.unimi.dsi.io.LineIterator;
import it.unimi.dsi.logging.ProgressLogger;
import it.unimi.dsi.sux4j.io.BucketedHashStore;
import it.unimi.dsi.sux4j.mph.AbstractHashFunction;
import it.unimi.dsi.sux4j.mph.GOV3Function;
import it.unimi.dsi.sux4j.mph.Hashes;
import it.unimi.dsi.sux4j.mph.ZFastTrieDistributor;
import it.unimi.dsi.util.XoRoShiRo128PlusRandomGenerator;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Serializable;
import java.nio.charset.Charset;
import java.util.zip.GZIPInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ZFastTrieDistributorMonotoneMinimalPerfectHashFunction<T>
extends AbstractHashFunction<T>
implements Serializable {
    public static final long serialVersionUID = 5L;
    private static final Logger LOGGER = LoggerFactory.getLogger(ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.class);
    private final long size;
    private final int log2BucketSize;
    private final TransformationStrategy<? super T> transform;
    private final ZFastTrieDistributor<BitVector> distributor;
    private final GOV3Function<BitVector> offset;
    private long seed;
    protected final long signatureMask;
    protected final LongBigList signatures;

    protected ZFastTrieDistributorMonotoneMinimalPerfectHashFunction(Iterable<? extends T> keys, TransformationStrategy<? super T> transform, int log2BucketSize, int signatureWidth, File tempDir) throws IOException {
        this.transform = transform;
        this.defRetValue = -1L;
        long maxLength = 0L;
        long totalLength = 0L;
        XoRoShiRo128PlusRandomGenerator r = new XoRoShiRo128PlusRandomGenerator();
        BucketedHashStore<BitVector> bucketedHashStore = new BucketedHashStore<BitVector>(TransformationStrategies.identity(), tempDir);
        bucketedHashStore.reset(r.nextLong());
        Iterable bitVectors = TransformationStrategies.wrap(keys, transform);
        ProgressLogger pl = new ProgressLogger(LOGGER);
        pl.displayLocalSpeed = true;
        pl.displayFreeMemory = true;
        pl.itemsName = "keys";
        pl.start((CharSequence)"Scanning collection...");
        for (BitVector bv : bitVectors) {
            maxLength = Math.max(maxLength, bv.length());
            totalLength += bv.length();
            bucketedHashStore.add(bv);
            pl.lightUpdate();
        }
        pl.done();
        bucketedHashStore.checkAndRetry(bitVectors);
        this.size = bucketedHashStore.size();
        if (this.size == 0L) {
            this.log2BucketSize = -1;
            this.distributor = null;
            this.offset = null;
            this.signatureMask = 0L;
            this.signatures = null;
            bucketedHashStore.close();
            return;
        }
        long averageLength = (totalLength + this.size - 1L) / this.size;
        long forecastBucketSize = (long)Math.ceil(10.5 + 4.05 * Math.log(averageLength) + 2.43 * Math.log(Math.log(this.size) + 1.0) + 2.43 * Math.log(Math.log(averageLength) + 1.0));
        this.log2BucketSize = log2BucketSize == -1 ? Fast.mostSignificantBit((long)forecastBucketSize) : log2BucketSize;
        LOGGER.debug("Average length: " + averageLength);
        LOGGER.debug("Max length: " + maxLength);
        LOGGER.debug("Bucket size: " + (1L << this.log2BucketSize));
        LOGGER.info("Computing z-fast trie distributor...");
        this.distributor = new ZFastTrieDistributor(bitVectors, this.log2BucketSize, TransformationStrategies.identity(), bucketedHashStore);
        LOGGER.info("Computing offsets...");
        this.offset = new GOV3Function.Builder<BitVector>().store(bucketedHashStore).values((LongIterable)new AbstractLongBigList(){
            final long bucketSizeMask;
            {
                this.bucketSizeMask = (1L << ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.this.log2BucketSize) - 1L;
            }

            public long getLong(long index) {
                return index & this.bucketSizeMask;
            }

            public long size64() {
                return ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.this.size;
            }
        }, this.log2BucketSize).indirect().build();
        this.seed = bucketedHashStore.seed();
        double logU = (double)averageLength * Math.log(2.0);
        LOGGER.info("Forecast bit cost per element: " + 1.0 / (double)forecastBucketSize * (-6.0 * Fast.log2((double)Math.log(2.0)) + 5.0 * Fast.log2((double)logU) + 2.0 * Fast.log2((double)forecastBucketSize) + Fast.log2((double)(Math.log(logU) - Math.log(Math.log(2.0)))) + 6.0 * GOV3Function.C + 3.0 * Fast.log2((double)Math.E) + 3.0 * Fast.log2((double)Math.log(3.0 * (double)this.size)) + 3.0 + GOV3Function.C * (double)forecastBucketSize + GOV3Function.C * (double)forecastBucketSize * Fast.log2((double)forecastBucketSize)));
        LOGGER.info("Actual bit cost per element: " + (double)this.numBits() / (double)this.size);
        if (signatureWidth != 0) {
            this.signatureMask = -1L >>> -signatureWidth;
            this.signatures = bucketedHashStore.signatures(signatureWidth, pl);
        } else {
            this.signatureMask = 0L;
            this.signatures = null;
        }
        bucketedHashStore.close();
    }

    public long getLong(Object o) {
        if (this.size == 0L) {
            return this.defRetValue;
        }
        BitVector bv = this.transform.toBitVector(o).fast();
        long[] state = Hashes.preprocessSpooky4(bv, this.seed);
        long[] signature = new long[2];
        Hashes.spooky4(bv, bv.length(), this.seed, state, signature);
        long bucket = this.distributor.getLongByBitVectorSignatureAndState(bv, signature, state);
        long result = (bucket << this.log2BucketSize) + this.offset.getLongBySignature(signature);
        if (this.signatureMask != 0L) {
            return result < 0L || result >= this.size || this.signatures.getLong(result) != (signature[0] & this.signatureMask) ? this.defRetValue : result;
        }
        return result < 0L || result >= this.size ? this.defRetValue : result;
    }

    @Override
    public long size64() {
        return this.size;
    }

    public long numBits() {
        if (this.size == 0L) {
            return 0L;
        }
        return this.distributor.numBits() + this.offset.numBits() + this.transform.numBits();
    }

    public static void main(String[] arg) throws NoSuchMethodException, IOException, JSAPException {
        Object collection;
        SimpleJSAP jsap = new SimpleJSAP(ZFastTrieDistributorMonotoneMinimalPerfectHashFunction.class.getName(), "Builds a monotone minimal perfect hash using a probabilistic z-fast trie as a distributor reading a newline-separated list of strings.", new Parameter[]{new FlaggedOption("encoding", (StringParser)ForNameStringParser.getParser(Charset.class), "UTF-8", false, 'e', "encoding", "The string file encoding."), new FlaggedOption("tempDir", (StringParser)FileStringParser.getParser(), JSAP.NO_DEFAULT, false, 'T', "temp-dir", "A directory for temporary files."), new Switch("huTucker", 'h', "hu-tucker", "Use Hu-Tucker coding to reduce string length."), new Switch("iso", 'i', "iso", "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."), new Switch("utf32", '\u0000', "utf-32", "Use UTF-32 internally (handles surrogate pairs)."), new FlaggedOption("signatureWidth", (StringParser)JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, false, 's', "signature-width", "If specified, the signature width in bits; if negative, the generated function will be a dictionary."), new Switch("zipped", 'z', "zipped", "The string list is compressed in gzip format."), new FlaggedOption("log2bucket", (StringParser)JSAP.INTEGER_PARSER, "-1", false, 'b', "log2bucket", "The base 2 logarithm of the bucket size (mainly for testing)."), new UnflaggedOption("function", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, true, false, "The filename for the serialised monotone minimal perfect hash function."), new UnflaggedOption("stringFile", (StringParser)JSAP.STRING_PARSER, "-", false, false, "The name of a file containing a newline-separated list of strings, or - for standard input; in the first case, strings will not be loaded into core memory.")});
        JSAPResult jsapResult = jsap.parse(arg);
        if (jsap.messagePrinted()) {
            return;
        }
        String functionName = jsapResult.getString("function");
        String stringFile = jsapResult.getString("stringFile");
        int log2BucketSize = jsapResult.getInt("log2bucket");
        Charset encoding = (Charset)jsapResult.getObject("encoding");
        File tempDir = jsapResult.getFile("tempDir");
        boolean zipped = jsapResult.getBoolean("zipped");
        boolean iso = jsapResult.getBoolean("iso");
        boolean utf32 = jsapResult.getBoolean("utf32");
        boolean huTucker = jsapResult.getBoolean("huTucker");
        int signatureWidth = jsapResult.getInt("signatureWidth", 0);
        if ("-".equals(stringFile)) {
            ProgressLogger pl = new ProgressLogger(LOGGER);
            pl.displayLocalSpeed = true;
            pl.displayFreeMemory = true;
            pl.start((CharSequence)"Loading strings...");
            collection = new LineIterator(new FastBufferedReader((Reader)new InputStreamReader(zipped ? new GZIPInputStream(System.in) : System.in, encoding)), pl).allLines();
            pl.done();
        } else {
            collection = new FileLinesCollection((CharSequence)stringFile, encoding.toString(), zipped);
        }
        HuTuckerTransformationStrategy transformationStrategy = huTucker ? new HuTuckerTransformationStrategy((Iterable)collection, true) : (iso ? TransformationStrategies.prefixFreeIso() : (utf32 ? TransformationStrategies.prefixFreeUtf32() : TransformationStrategies.prefixFreeUtf16()));
        BinIO.storeObject(new ZFastTrieDistributorMonotoneMinimalPerfectHashFunction(collection, transformationStrategy, log2BucketSize, signatureWidth, tempDir), (CharSequence)functionName);
        LOGGER.info("Completed.");
    }

    public static class Builder<T> {
        protected Iterable<? extends T> keys;
        protected TransformationStrategy<? super T> transform;
        protected long numKeys = -1L;
        protected int signatureWidth;
        protected File tempDir;
        protected boolean built;

        public Builder<T> keys(Iterable<? extends T> keys) {
            this.keys = keys;
            return this;
        }

        public Builder<T> transform(TransformationStrategy<? super T> transform) {
            this.transform = transform;
            return this;
        }

        public Builder<T> signed(int signatureWidth) {
            this.signatureWidth = signatureWidth;
            return this;
        }

        public Builder<T> tempDir(File tempDir) {
            this.tempDir = tempDir;
            return this;
        }

        public ZFastTrieDistributorMonotoneMinimalPerfectHashFunction<T> build() throws IOException {
            if (this.built) {
                throw new IllegalStateException("This builder has been already used");
            }
            this.built = true;
            return new ZFastTrieDistributorMonotoneMinimalPerfectHashFunction<T>(this.keys, this.transform, -1, this.signatureWidth, this.tempDir);
        }
    }
}

