package org.apache.tika.parser.microsoft;

import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.commons.io.IOUtils;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.DocumentNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.tika.detect.Detector;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;

/* loaded from: input_file:org/apache/tika/parser/microsoft/POIFSContainerDetector.class */
public class POIFSContainerDetector implements Detector {
    private static final long serialVersionUID = -3028021741663605293L;
    public static final MediaType OLE = MediaType.application("x-tika-msoffice");
    public static final MediaType OOXML_PROTECTED = MediaType.application("x-tika-ooxml-protected");
    public static final MediaType GENERAL_EMBEDDED = MediaType.application("x-tika-msoffice-embedded");
    public static final MediaType OLE10_NATIVE = new MediaType(GENERAL_EMBEDDED, "format", "ole10_native");
    public static final MediaType COMP_OBJ = new MediaType(GENERAL_EMBEDDED, "format", "comp_obj");
    public static final MediaType MS_GRAPH_CHART = MediaType.application("vnd.ms-graph");
    public static final MediaType MS_EQUATION = MediaType.application("vnd.ms-equation");
    public static final MediaType XLS = MediaType.application("vnd.ms-excel");
    public static final MediaType DOC = MediaType.application("msword");
    public static final MediaType PPT = MediaType.application("vnd.ms-powerpoint");
    public static final MediaType PUB = MediaType.application("x-mspublisher");
    public static final MediaType VSD = MediaType.application("vnd.visio");
    public static final MediaType WPS = MediaType.application("vnd.ms-works");
    public static final MediaType XLR = MediaType.application("x-tika-msworks-spreadsheet");
    public static final MediaType MSG = MediaType.application("vnd.ms-outlook");
    public static final MediaType MPP = MediaType.application("vnd.ms-project");
    public static final MediaType SDC = MediaType.application("vnd.stardivision.calc");
    public static final MediaType SDA = MediaType.application("vnd.stardivision.draw");
    public static final MediaType SDD = MediaType.application("vnd.stardivision.impress");
    public static final MediaType SDW = MediaType.application("vnd.stardivision.writer");
    public static final MediaType SLDWORKS = MediaType.application("sldworks");
    public static final MediaType HWP = MediaType.application("x-hwp-v5");
    public static final MediaType QUATTROPRO = MediaType.application("x-quattro-pro");
    private static final byte[] STAR_IMPRESS = {83, 116, 97, 114, 73, 109, 112, 114, 101, 115, 115};
    private static final byte[] STAR_DRAW = {83, 116, 97, 114, 68, 114, 97, 119};
    private static final byte[] WORKS_QUILL96 = {81, 117, 105, 108, 108, 57, 54};
    private static final byte[] MS_GRAPH_CHART_BYTES = {77, 83, 71, 114, 97, 112, 104, 46, 67, 104, 97, 114, 116};
    private static final Pattern mppDataMatch = Pattern.compile("\\s\\s\\s\\d+");

    protected static MediaType detect(Set<String> set) {
        return detect(set, (DirectoryEntry) null);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public static MediaType detect(Set<String> set, DirectoryEntry directoryEntry) {
        if (set != null) {
            if (set.contains("SwDocContentMgr") && set.contains("SwDocMgrTempStorage")) {
                return SLDWORKS;
            }
            if (set.contains("StarCalcDocument")) {
                return SDC;
            }
            if (set.contains("StarWriterDocument")) {
                return SDW;
            }
            if (set.contains("StarDrawDocument3")) {
                return directoryEntry == null ? OLE : processCompObjFormatType(directoryEntry);
            }
            if (set.contains("\u0005HwpSummaryInformation")) {
                return HWP;
            }
            if (set.contains("WksSSWorkBook")) {
                return XLR;
            }
            if (set.contains("Workbook") || set.contains("WORKBOOK")) {
                return processCompObjFormatType(directoryEntry).equals(MS_GRAPH_CHART) ? MS_GRAPH_CHART : XLS;
            }
            if (set.contains("Book")) {
                return XLS;
            }
            if (set.contains("EncryptedPackage") && set.contains("EncryptionInfo") && set.contains("\u0006DataSpaces")) {
                return OOXML_PROTECTED;
            }
            if (set.contains("EncryptedPackage")) {
                return OLE;
            }
            if (set.contains("WordDocument")) {
                return DOC;
            }
            if (set.contains("Quill")) {
                return PUB;
            }
            if (set.contains("PowerPoint Document")) {
                return PPT;
            }
            if (set.contains("VisioDocument")) {
                return VSD;
            }
            if (set.contains("\u0001Ole10Native")) {
                return OLE10_NATIVE;
            }
            if (set.contains("MatOST")) {
                return WPS;
            }
            if (set.contains("CONTENTS") && set.contains("SPELLING")) {
                return WPS;
            }
            if (set.contains("Contents") && set.contains("\u0003ObjInfo")) {
                return COMP_OBJ;
            }
            if (set.contains("CONTENTS") && set.contains("\u0001CompObj")) {
                if (directoryEntry != null && processCompObjFormatType(directoryEntry) == WPS) {
                    return WPS;
                }
                return COMP_OBJ;
            }
            if (set.contains("CONTENTS")) {
                return OLE;
            }
            if (set.contains("\u0001CompObj") && (set.contains("Props") || set.contains("Props9") || set.contains("Props12"))) {
                Iterator<String> it = set.iterator();
                while (it.hasNext()) {
                    if (mppDataMatch.matcher(it.next()).matches()) {
                        return MPP;
                    }
                }
            } else if (set.contains("PerfectOffice_MAIN")) {
                if (set.contains("SlideShow")) {
                    return MediaType.application("x-corelpresentations");
                }
                if (set.contains("PerfectOffice_OBJECTS")) {
                    return new MediaType(QUATTROPRO, "version", "7-8");
                }
            } else {
                if (set.contains("NativeContent_MAIN")) {
                    return new MediaType(QUATTROPRO, "version", "9");
                }
                if (set.contains("Equation Native")) {
                    return MS_EQUATION;
                }
                Iterator<String> it2 = set.iterator();
                while (it2.hasNext()) {
                    if (it2.next().startsWith("__substg1.0_")) {
                        return MSG;
                    }
                }
            }
        }
        return OLE;
    }

    private static MediaType processCompObjFormatType(DirectoryEntry directoryEntry) {
        try {
            DocumentNode entry = directoryEntry.getEntry("\u0001CompObj");
            if (entry != null && entry.isDocumentEntry()) {
                byte[] byteArray = IOUtils.toByteArray(new DocumentInputStream(entry));
                if (arrayContains(byteArray, MS_GRAPH_CHART_BYTES)) {
                    return MS_GRAPH_CHART;
                }
                if (arrayContains(byteArray, STAR_DRAW)) {
                    return SDA;
                }
                if (arrayContains(byteArray, STAR_IMPRESS)) {
                    return SDD;
                }
                if (arrayContains(byteArray, WORKS_QUILL96)) {
                    return WPS;
                }
            }
        } catch (Exception e) {
        }
        return OLE;
    }

    private static boolean arrayContains(byte[] bArr, byte[] bArr2) {
        int i = 0;
        int i2 = 0;
        while (i < bArr.length) {
            if (bArr[i] == bArr2[i2]) {
                i++;
                i2++;
                if (i2 == bArr2.length) {
                    return true;
                }
            } else {
                i = (i - i2) + 1;
                i2 = 0;
            }
        }
        return false;
    }

    private static Set<String> getTopLevelNames(TikaInputStream tikaInputStream) throws IOException {
        try {
            NPOIFSFileSystem nPOIFSFileSystem = new NPOIFSFileSystem(tikaInputStream.getFile(), true);
            tikaInputStream.setOpenContainer(nPOIFSFileSystem);
            return getTopLevelNames(nPOIFSFileSystem.getRoot());
        } catch (IOException e) {
            return Collections.emptySet();
        } catch (RuntimeException e2) {
            return Collections.emptySet();
        }
    }

    private static Set<String> getTopLevelNames(DirectoryNode directoryNode) {
        HashSet hashSet = new HashSet();
        Iterator it = directoryNode.iterator();
        while (it.hasNext()) {
            hashSet.add(((Entry) it.next()).getName());
        }
        return hashSet;
    }

    public MediaType detect(InputStream inputStream, Metadata metadata) throws IOException {
        if (inputStream == null) {
            return MediaType.OCTET_STREAM;
        }
        TikaInputStream cast = TikaInputStream.cast(inputStream);
        Set<String> set = null;
        if (cast != null) {
            Object openContainer = cast.getOpenContainer();
            if (openContainer instanceof NPOIFSFileSystem) {
                set = getTopLevelNames(((NPOIFSFileSystem) openContainer).getRoot());
            } else if (openContainer instanceof DirectoryNode) {
                set = getTopLevelNames((DirectoryNode) openContainer);
            }
        }
        if (set == null) {
            inputStream.mark(8);
            try {
                if (inputStream.read() != 208 || inputStream.read() != 207 || inputStream.read() != 17 || inputStream.read() != 224 || inputStream.read() != 161 || inputStream.read() != 177 || inputStream.read() != 26 || inputStream.read() != 225) {
                    MediaType mediaType = MediaType.OCTET_STREAM;
                    inputStream.reset();
                    return mediaType;
                }
                inputStream.reset();
            } catch (Throwable th) {
                inputStream.reset();
                throw th;
            }
        }
        if (set == null && cast != null) {
            set = getTopLevelNames(cast);
        }
        return (cast == null || cast.getOpenContainer() == null || !(cast.getOpenContainer() instanceof NPOIFSFileSystem)) ? detect(set, (DirectoryEntry) null) : detect(set, (DirectoryEntry) ((NPOIFSFileSystem) cast.getOpenContainer()).getRoot());
    }
}
