/*
 * Copyright 2015, Yahoo! Inc.
 * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms.
 */
package com.yahoo.sketches.hash;

import java.io.Serializable;

/**
 * <p>
 * The MurmurHash3 is a fast, non-cryptographic, 128-bit hash function that has
 * excellent avalanche and 2-way bit independence properties.
 * </p>
 * 
 * <p>
 * The C++ MurmurHash3_x64_128(...), revision 150, of the MurmurHash3, written by Austin Appleby, 
 * which is in the Public Domain, was the inspiration for this implementation in Java. 
 * The C++ version can be found at 
 * <a href= "http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp"> SMHasher &amp;
 * MurmurHash</a>.
 * </p>
 * 
 * <p>
 * This java implementation pays close attention to the C++ algorithms in order to
 * maintain bit-wise compatibility, but the design is quite different. This implementation has also
 * been extended to include processing of arrays of longs or ints, which was not part of the
 * original C++ implementation. This implementation produces the same exact output hash bits as
 * the above C++ method given the same input. Byte arrays that are a multiple of 8 bytes in length
 * will produce the same output hash as the equivalent long array in little-endian byte order.
 * </p>
 * 
 * <p>
 * The structure of this implementation also reflects a separation of code that is dependent on the
 * input structure (in this case byte[], int[] or long[]) from code that is independent of the input
 * structure. This also makes the code more readable and suitable for future extensions.
 * </p>
 * 
 * @author Lee Rhodes
 */
public final class MurmurHash3 implements Serializable {
  private static final long serialVersionUID = 0L;
  
  private MurmurHash3() {}
  
  //--Hash of long[]----------------------------------------------------
  /**
   * Returns a long array of size 2, which is a 128-bit hash of the input.
   * 
   * @param key The input long[] array. Must be non-null and non-empty.
   * @param seed A long valued seed.
   * @return the hash.
   */
  public static long[] hash(long[] key, long seed) {
    HashState hashState = new HashState(seed, seed);
    final int longs = key.length; //in longs
    
    // Number of full 128-bit blocks of 2 longs (the body).
    // Possible exclusion of a remainder of 1 long.
    final int nblocks = longs >> 1; //longs / 2
    
    // Process the 128-bit blocks (the body) into the hash
    for (int i = 0; i < nblocks; i++ ) {
      long k1 = key[2 * i]; //0, 2, 4, ...
      long k2 = key[(2 * i) + 1]; //1, 3, 5, ...
      hashState.blockMix128(k1, k2);
    }
    
    // Get the tail index, remainder length
    int tail = nblocks * 2; // 2 longs / block
    int rem = longs - tail; // remainder longs: 0,1
    
    // Get the tail
    long k1 = (rem == 0) ? 0 : key[tail]; //k2 -> 0
    // Mix the tail into the hash and return
    return hashState.finalMix128(k1, 0, longs * 8); //convert to bytes
  }
  
  //--Hash of int[]----------------------------------------------------
  /**
   * Returns a long array of size 2, which is a 128-bit hash of the input.
   * 
   * @param key The input int[] array. Must be non-null and non-empty.
   * @param seed A long valued seed.
   * @return the hash.
   */
  public static long[] hash(int[] key, long seed) {
    HashState hashState = new HashState(seed, seed);
    final int ints = key.length; //in ints
    
    // Number of full 128-bit blocks of 4 ints.
    // Possible exclusion of a remainder of up to 3 ints.
    final int nblocks = ints >> 2; //ints / 4
    
    // Process the 128-bit blocks (the body) into the hash
    for (int i = 0; i < nblocks; i++ ) { //4 ints per block
      long k1 = getLong(key, 4 * i, 2); //0, 4, 8, ...
      long k2 = getLong(key, (4 * i) + 2, 2); //2, 6, 10, ...
      hashState.blockMix128(k1, k2);
    }
    
    // Get the tail index, remainder length
    int tail = nblocks * 4; // 4 ints per block 
    int rem = ints - tail; // remainder ints: 0,1,2,3
    
    // Get the tail
    long k1 = 0;
    long k2 = 0;
    if (rem > 2) { //k1 -> whole; k2 -> partial
      k1 = getLong(key, tail, 2);
      k2 = getLong(key, tail + 2, rem - 2);
    } 
    else { //k1 -> whole, partial or 0; k2 == 0
      k1 = (rem == 0) ? 0 : getLong(key, tail, rem);
    }
    // Mix the tail into the hash and return
    return hashState.finalMix128(k1, k2, ints * 4); //convert to bytes
  }
  
  //--Hash of byte[]----------------------------------------------------
  /**
   * Returns a long array of size 2, which is a 128-bit hash of the input.
   * 
   * @param key The input byte[] array. Must be non-null and non-empty.
   * @param seed A long valued seed.
   * @return the hash.
   */
  public static long[] hash(byte[] key, long seed) {
    HashState hashState = new HashState(seed, seed);
    final int bytes = key.length; //in bytes
    
    // Number of full 128-bit blocks of 16 bytes.
    // Possible exclusion of a remainder of up to 15 bytes.
    final int nblocks = bytes >> 4; //bytes / 16
    
    // Process the 128-bit blocks (the body) into the hash
    for (int i = 0; i < nblocks; i++ ) { //16 bytes per block
      long k1 = getLong(key, 16 * i, 8); //0, 16, 32, ...
      long k2 = getLong(key, (16 * i) + 8, 8); //8, 24, 40, ...
      hashState.blockMix128(k1, k2);
    }
    
    // Get the tail index, remainder length
    int tail = nblocks * 16; //16 bytes per block
    int rem = bytes - tail; // remainder bytes: 0,1,...,15
    
    // Get the tail
    long k1 = 0;
    long k2 = 0;
    if (rem > 8) { //k1 -> whole; k2 -> partial
      k1 = getLong(key, tail, 8);
      k2 = getLong(key, tail + 8, rem - 8);
    } 
    else { //k1 -> whole, partial or 0; k2 == 0
      k1 = (rem == 0) ? 0 : getLong(key, tail, rem);
    }
    // Mix the tail into the hash and return
    return hashState.finalMix128(k1, k2, bytes);
  }
  
  //--HashState class---------------------------------------------------
  /**
   * Common processing of the 128-bit hash state independent of input type.
   */
  private static final class HashState {
    private static final long C1 = 0x87c37b91114253d5L;
    private static final long C2 = 0x4cf5ad432745937fL;
    private long h1;
    private long h2;
    
    HashState(long h1, long h2) {
      this.h1 = h1;
      this.h2 = h2;
    }
    
    /**
     * Block mix (128-bit block) of input key to internal hash state.
     * 
     * @param k1 intermediate mix value
     * @param k2 intermediate mix value
     */
    void blockMix128(long k1, long k2) {
      h1 ^= mixK1(k1);
      h1 = Long.rotateLeft(h1, 27);
      h1 += h2;
      h1 = (h1 * 5) + 0x52dce729;
      
      h2 ^= mixK2(k2);
      h2 = Long.rotateLeft(h2, 31);
      h2 += h1;
      h2 = (h2 * 5) + 0x38495ab5;
    }
    
    long[] finalMix128(long k1, long k2, long inputLengthBytes) {
      h1 ^= mixK1(k1);
      h2 ^= mixK2(k2);
      h1 ^= inputLengthBytes;
      h2 ^= inputLengthBytes;
      h1 += h2;
      h2 += h1;
      h1 = finalMix64(h1);
      h2 = finalMix64(h2);
      h1 += h2;
      h2 += h1;
      return (new long[] { h1, h2 });
    }
    
    /**
     * Final self mix of h*.
     * 
     * @param h input to final mix
     * @return mix
     */
    private static long finalMix64(long h) {
      h ^= h >>> 33;
      h *= 0xff51afd7ed558ccdL;
      h ^= h >>> 33;
      h *= 0xc4ceb9fe1a85ec53L;
      h ^= h >>> 33;
      return h;
    }
    
    /**
     * Self mix of k1
     * 
     * @param k1 input argument
     * @return mix
     */
    private static long mixK1(long k1) {
      k1 *= C1;
      k1 = Long.rotateLeft(k1, 31);
      k1 *= C2;
      return k1;
    }
    
    /**
     * Self mix of k2
     * 
     * @param k2 input argument
     * @return mix
     */
    private static long mixK2(long k2) {
      k2 *= C2;
      k2 = Long.rotateLeft(k2, 33);
      k2 *= C1;
      return k2;
    }
  }
  
  //--Helper methods----------------------------------------------------
  /**
   * Gets a long from the given byte array starting at the given byte array index and continuing for
   * remainder (rem) bytes. The bytes are extracted in little-endian order. There is no limit
   * checking.
   * 
   * @param bArr The given input byte array.
   * @param index Zero-based index from the start of the byte array.
   * @param rem Remainder bytes. An integer in the range [1,8].
   * @return long
   */
  private static long getLong(byte[] bArr, int index, int rem) {
    long out = 0L;
    for (int i = rem; i-- > 0;) { //i= 7,6,5,4,3,2,1,0
      byte b = bArr[index + i];
      out ^= (b & 0xFFL) << (i * 8); //equivalent to |=
    }
    return out;
  }
  
  /**
   * Gets a long from the given int array starting at the given int array index and continuing for
   * remainder (rem) integers. The integers are extracted in little-endian order. There is no limit
   * checking.
   * 
   * @param intArr The given input int array.
   * @param index Zero-based index from the start of the int array.
   * @param rem Remainder integers. An integer in the range [1,2].
   * @return long
   */
  private static long getLong(int[] intArr, int index, int rem) {
    long out = 0L;
    for (int i = rem; i-- > 0;) { //i= 1,0
      int v = intArr[index + i];
      out ^= (v & 0xFFFFFFFFL) << (i * 32); //equivalent to |=
    }
    return out;
  }
}