package edu.umd.cloud9.webgraph.driver;

import edu.umd.cloud9.io.FSLineReader;
import edu.umd.cloud9.io.array.ArrayListWritable;
import edu.umd.cloud9.mapred.NoSplitSequenceFileInputFormat;
import edu.umd.cloud9.webgraph.data.AnchorText;
import edu.umd.cloud9.webgraph.data.IndexableAnchorTextForwardIndex;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapRunnable;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.lib.IdentityReducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger;

/* loaded from: input_file:edu/umd/cloud9/webgraph/driver/BuildAnchorTextForwardIndex.class */
public class BuildAnchorTextForwardIndex extends Configured implements Tool {
    private static final Logger LOG = Logger.getLogger(BuildAnchorTextForwardIndex.class);

    /* loaded from: input_file:edu/umd/cloud9/webgraph/driver/BuildAnchorTextForwardIndex$Blocks.class */
    private enum Blocks {
        Total;

        /* renamed from: values, reason: to resolve conflict with enum method */
        public static Blocks[] valuesCustom() {
            Blocks[] valuesCustom = values();
            int length = valuesCustom.length;
            Blocks[] blocksArr = new Blocks[length];
            System.arraycopy(valuesCustom, 0, blocksArr, 0, length);
            return blocksArr;
        }
    }

    /* loaded from: input_file:edu/umd/cloud9/webgraph/driver/BuildAnchorTextForwardIndex$MyMapRunner.class */
    private static class MyMapRunner implements MapRunnable<IntWritable, ArrayListWritable<AnchorText>, IntWritable, Text> {
        private static int fileno;
        private static final IntWritable sOutputKey = new IntWritable();
        private static final Text sOutputValue = new Text();

        private MyMapRunner() {
        }

        public void configure(JobConf jobConf) {
            String str = jobConf.get("map.input.file");
            fileno = Integer.parseInt(str.substring(str.indexOf("part-") + 5));
        }

        public void run(RecordReader<IntWritable, ArrayListWritable<AnchorText>> recordReader, OutputCollector<IntWritable, Text> outputCollector, Reporter reporter) throws IOException {
            IntWritable intWritable = new IntWritable();
            ArrayListWritable arrayListWritable = new ArrayListWritable();
            long j = -1;
            int i = 0;
            long pos = recordReader.getPos();
            while (recordReader.next(intWritable, arrayListWritable)) {
                if (j != -1 && j != pos) {
                    BuildAnchorTextForwardIndex.LOG.info("- beginning of block at " + j + ", docno:" + i + ", file:" + fileno);
                    sOutputKey.set(i);
                    sOutputValue.set(String.valueOf(j) + "\t" + fileno);
                    outputCollector.collect(sOutputKey, sOutputValue);
                    reporter.incrCounter(Blocks.Total, 1L);
                }
                j = pos;
                pos = recordReader.getPos();
                i = intWritable.get();
            }
        }
    }

    private static int printUsage() {
        System.out.println("usage: [collection-path] [output-path] [index-file]");
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    public int run(String[] strArr) throws Exception {
        if (strArr.length != 3) {
            printUsage();
            return -1;
        }
        JobConf jobConf = new JobConf(BuildAnchorTextForwardIndex.class);
        FileSystem fileSystem = FileSystem.get(jobConf);
        String str = strArr[0];
        String str2 = strArr[1];
        String str3 = strArr[2];
        LOG.info("Tool name: BuildAnchorTextForwardIndex");
        LOG.info(" - collection path: " + str);
        LOG.info(" - output path: " + str2);
        LOG.info(" - index file: " + str3);
        LOG.info("Note: This tool only works on block-compressed SequenceFiles!");
        jobConf.set("mapred.child.java.opts", "-Xmx2048m");
        jobConf.setJobName("BuildAnchorTextForwardIndex");
        jobConf.setNumMapTasks(100);
        jobConf.setNumReduceTasks(1);
        FileInputFormat.setInputPaths(jobConf, new Path[]{new Path(str)});
        FileOutputFormat.setOutputPath(jobConf, new Path(str2));
        FileOutputFormat.setCompressOutput(jobConf, false);
        jobConf.setInputFormat(NoSplitSequenceFileInputFormat.class);
        jobConf.setOutputKeyClass(IntWritable.class);
        jobConf.setOutputValueClass(Text.class);
        jobConf.setMapRunnerClass(MyMapRunner.class);
        jobConf.setReducerClass(IdentityReducer.class);
        fileSystem.delete(new Path(str2), true);
        int counter = (int) JobClient.runJob(jobConf).getCounters().findCounter(Blocks.Total).getCounter();
        LOG.info("number of blocks: " + counter);
        LOG.info("Writing index file...");
        FSLineReader fSLineReader = new FSLineReader(String.valueOf(str2) + "/part-00000", fileSystem);
        FSDataOutputStream create = fileSystem.create(new Path(str3), true);
        create.writeUTF(IndexableAnchorTextForwardIndex.class.getName());
        create.writeUTF(str);
        create.writeInt(counter);
        int i = 0;
        Text text = new Text();
        while (fSLineReader.readLine(text) > 0) {
            String[] split = text.toString().split("\\s+");
            int parseInt = Integer.parseInt(split[0]);
            int parseInt2 = Integer.parseInt(split[1]);
            short parseShort = Short.parseShort(split[2]);
            create.writeInt(parseInt);
            create.writeInt(parseInt2);
            create.writeShort(parseShort);
            i++;
            if (i % 1000 == 0) {
                LOG.info(String.valueOf(i) + " blocks written");
            }
        }
        fSLineReader.close();
        create.close();
        if (i != counter) {
            throw new RuntimeException("Error: mismatch in block count!");
        }
        return 0;
    }

    public static void main(String[] strArr) throws Exception {
        System.exit(ToolRunner.run(new Configuration(), new BuildAnchorTextForwardIndex(), strArr));
    }
}
