package org.carrot2.text.preprocessing;

import com.carrotsearch.hppc.BitSet;
import com.carrotsearch.hppc.IntArrayList;
import java.util.ArrayList;
import org.carrot2.attrs.AttrBoolean;
import org.carrot2.attrs.AttrComposite;
import org.carrot2.attrs.AttrInteger;
import org.carrot2.language.TokenTypeUtils;

/* loaded from: input_file:org/carrot2/text/preprocessing/DocumentAssigner.class */
public class DocumentAssigner extends AttrComposite {
    public AttrBoolean exactPhraseAssignment = this.attributes.register("exactPhraseAssignment", AttrBoolean.builder().label2("Exact phrase assignment").defaultValue(false));
    public AttrInteger minClusterSize = this.attributes.register("minClusterSize", AttrInteger.builder().label2("Minimum cluster size").min(1).max(100).defaultValue(2));

    /* JADX INFO: Access modifiers changed from: package-private */
    public void assign(PreprocessingContext preprocessingContext) {
        int[] iArr = preprocessingContext.allLabels.featureIndex;
        int[][] iArr2 = preprocessingContext.allStems.tfByDocument;
        int[] iArr3 = preprocessingContext.allWords.stemIndex;
        short[] sArr = preprocessingContext.allWords.type;
        int[][] iArr4 = preprocessingContext.allPhrases.tfByDocument;
        int[][] iArr5 = preprocessingContext.allPhrases.wordIndices;
        int length = iArr3.length;
        int i = preprocessingContext.documentCount;
        BitSet[] bitSetArr = new BitSet[iArr.length];
        for (int i2 = 0; i2 < iArr.length; i2++) {
            BitSet bitSet = new BitSet(i);
            int i3 = iArr[i2];
            if (i3 < length) {
                addTfByDocumentToBitSet(bitSet, iArr2[iArr3[i3]]);
            } else {
                int i4 = i3 - length;
                if (this.exactPhraseAssignment.get().booleanValue()) {
                    addTfByDocumentToBitSet(bitSet, iArr4[i4]);
                } else {
                    boolean z = false;
                    for (int i5 : iArr5[i4]) {
                        if (!TokenTypeUtils.isCommon(sArr[i5])) {
                            if (z) {
                                BitSet bitSet2 = new BitSet(i);
                                addTfByDocumentToBitSet(bitSet2, iArr2[iArr3[i5]]);
                                bitSet.and(bitSet2);
                            } else {
                                addTfByDocumentToBitSet(bitSet, iArr2[iArr3[i5]]);
                                z = true;
                            }
                        }
                    }
                }
            }
            bitSetArr[i2] = bitSet;
        }
        int intValue = this.minClusterSize.get().intValue();
        if (intValue <= 1) {
            preprocessingContext.allLabels.documentIndices = bitSetArr;
            return;
        }
        IntArrayList intArrayList = new IntArrayList(iArr.length);
        ArrayList arrayList = new ArrayList(iArr.length);
        for (int i6 = 0; i6 < iArr.length; i6++) {
            if (bitSetArr[i6].cardinality() >= intValue) {
                intArrayList.add(iArr[i6]);
                arrayList.add(bitSetArr[i6]);
            }
        }
        preprocessingContext.allLabels.documentIndices = (BitSet[]) arrayList.toArray(new BitSet[0]);
        preprocessingContext.allLabels.featureIndex = intArrayList.toArray();
        LabelFilterProcessor.updateFirstPhraseIndex(preprocessingContext);
    }

    private static void addTfByDocumentToBitSet(BitSet bitSet, int[] iArr) {
        for (int i = 0; i < iArr.length / 2; i++) {
            bitSet.set(iArr[i * 2]);
        }
    }
}
