package it.unimi.dsi.big.io;

/*		 
 * DSI utilities
 *
 * Copyright (C) 2005-2014 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
 *
 */

import it.unimi.dsi.fastutil.Size64;
import it.unimi.dsi.fastutil.objects.ObjectBigArrayBigList;
import it.unimi.dsi.fastutil.objects.ObjectBigList;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.SafelyCloseable;
import it.unimi.dsi.lang.MutableString;

import java.io.Closeable;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.AbstractCollection;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.zip.GZIPInputStream;

/** A wrapper exhibiting the lines of a file as a {@link java.util.Collection}.
 * 
 * <P><strong>Warning</strong>: the lines returned by iterators generated by
 * instances of this class <em>are not cacheable</em>. The returned value is
 * a {@link it.unimi.dsi.lang.MutableString} instance that is reused
 * at each call, and that is <em>modified by a call to {@link java.util.Iterator#hasNext() hasNext()}</em>. 
 * Thus, for instance,
 * <pre>
 *    ObjectIterators.unwrap( fileLinesColletion.iterator() );
 * </pre>
 * will not give the expected results. Use {@link #allLines()} to get
 * the {@linkplain ObjectBigList big list} of all lines (again, under the form of compact {@link it.unimi.dsi.lang.MutableString}s).
 * Note also that {@link #toString()} will return a single string containing all
 * file lines separated by the string associated with the system property <samp>line.separator</samp>.
 * 
 * <P>An instance of this class allows to access the lines of a file as a
 * {@link java.util.Collection}. Using {@linkplain java.util.Collection#contains(java.lang.Object)
 * direct access} is strongly discouraged (it will require a full scan of the file), but
 * the {@link #iterator()} can be fruitfully used to scan the file, and can be called any
 * number of times, as it opens an independent input stream at each call. For the
 * same reason, the returned iterator type ({@link it.unimi.dsi.io.FileLinesCollection.FileLinesIterator})
 * is {@link java.io.Closeable}, and should be closed after usage.
 * 
 * <p>Using a suitable {@linkplain #FileLinesCollection(CharSequence, String, boolean) constructor}, it is possible
 * to specify that the file is compressed in <samp>gzip</samp> format (in this case, it will be opened using a {@link GZIPInputStream}).
 * 
 * <P>Note that the first call to {@link #size64()} will require a full file scan.  
 * 
 * @author Sebastiano Vigna
 * @since 2.0
 */
public class FileLinesCollection extends AbstractCollection<MutableString> implements Size64 {
	/** The filename upon which this file-lines collection is based. */
	private final String filename;
	/** The encoding of {@link #filename}, or {@code null} for the standard platform encoding. */
	private final String encoding;
	/** The cached size of the collection. */
	private long size = -1;
	/** Whether {@link #filename} is zipped. */
	private final boolean zipped;
	
	/** Creates a file-lines collection for the specified filename with the specified encoding.
	 * 
	 * @param filename a filename.
	 * @param encoding an encoding.
	 */		
	public FileLinesCollection( final CharSequence filename, final String encoding ) {
		this( filename, encoding, false );
	}

	/** Creates a file-lines collection for the specified filename with the specified encoding, optionally assuming
	 * that the file is compressed using <samp>gzip</samp> format.
	 * 
	 * @param filename a filename.
	 * @param encoding an encoding.
	 * @param zipped whether <samp>filename</samp> is zipped.
	 */		
	public FileLinesCollection( final CharSequence filename, final String encoding, final boolean zipped ) {
		this.zipped = zipped;
		this.filename = filename.toString();
		this.encoding = encoding;
	}

	
	/** An iterator over the lines of a {@link FileLinesCollection}. 
	 * 
	 * <p>Instances of this class open an {@link java.io.InputStream}, and thus should be {@linkplain Closeable#close() closed} after
	 * usage. A &ldquo;safety-net&rdquo; finaliser tries to take care of the cases in which
	 * closing an instance is impossible. An exhausted iterator, however, will be closed automagically.
	 */
	
	public static final class FileLinesIterator implements Iterator<MutableString>, SafelyCloseable {
		private FastBufferedReader fbr;
		MutableString s = new MutableString(), next;

		boolean toAdvance = true;

		private FileLinesIterator( final String filename, final String encoding, final boolean zipped ) {
			try {
				fbr = encoding != null
					? new FastBufferedReader( new InputStreamReader( zipped ? new GZIPInputStream( new FileInputStream( filename ) ) : new FileInputStream( filename ), encoding ) )
					: new FastBufferedReader( new FileReader( filename ) );
			} catch (IOException e) {
				throw new RuntimeException( e );
			}
		}

		public boolean hasNext() {
			if ( toAdvance ) {
				try {
					next = fbr.readLine( s );
					if ( next == null ) close();
				} catch (IOException e) {
					throw new RuntimeException( e );
				}
				toAdvance = false;
			}
			
			return next != null;
		}

		public MutableString next() {
			if ( ! hasNext() ) throw new NoSuchElementException();
			toAdvance = true;
			return s;
		}

		public void remove() {
			throw new UnsupportedOperationException();
		}
		
		public synchronized void close() {
			if ( fbr == null ) return;
			try {
				fbr.close();
			}
			catch ( IOException e ) {
				throw new RuntimeException( e );
			}
			finally {
				fbr = null;
			}
		}
		
		protected synchronized void finalize() throws Throwable {
			try {
				if ( fbr != null ) close();
			}
			finally {
				super.finalize();
			}
		}

	}

	public FileLinesIterator iterator() {
		return new FileLinesIterator( filename, encoding, zipped );
	}
		
	public synchronized int size() {
		return (int)Math.min( Integer.MAX_VALUE, size );
	}	

	public synchronized long size64() {
		if ( size == -1 ) {
			FileLinesIterator i = iterator();
			size = 0;
			while( i.hasNext() ) {
				size++;
				i.next();
			}
			i.close();
		}
		return size;
	}
		
	/** Returns all lines of the file wrapped by this file-lines collection.
	 * 
	 * @return all lines of the file wrapped by this file-lines collection.
	 */
	
	public ObjectBigList<MutableString> allLines() {
		final ObjectBigList<MutableString> result = new ObjectBigArrayBigList<MutableString>();
		for( Iterator<MutableString> i = iterator(); i.hasNext(); ) result.add( i.next().copy() );
		return result;
	}

	@Override
	public Object[] toArray() {
		throw new UnsupportedOperationException( "Use allLines()" );
	}

	@Override
	public <T> T[] toArray( T[] a ) {
		throw new UnsupportedOperationException( "Use allLines()" );
	}

	public String toString() {
		final MutableString separator = new MutableString( System.getProperty( "line.separator" ) );
		final MutableString s = new MutableString();
		for( MutableString l: this ) s.append( l ).append( separator );
		return s.toString();
	}
}
