package org.apache.tika.parser.wordperfect;

import java.io.IOException;
import java.io.InputStream;
import org.apache.tika.exception.EncryptedDocumentException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.UnsupportedFormatException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.WordPerfect;
import org.apache.tika.mime.MediaType;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.SAXException;

/* loaded from: input_file:org/apache/tika/parser/wordperfect/WP6TextExtractor.class */
class WP6TextExtractor {
    WP6TextExtractor() {
    }

    public void extract(InputStream inputStream, XHTMLContentHandler xHTMLContentHandler, Metadata metadata) throws IOException, SAXException, TikaException {
        WPInputStream wPInputStream = new WPInputStream(inputStream);
        WP6FileHeader parseFileHeader = parseFileHeader(wPInputStream);
        applyMetadata(parseFileHeader, metadata);
        if (parseFileHeader.getMajorVersion() == 0) {
            MediaType mediaType = WordPerfectParser.WP_UNK;
            if (parseFileHeader.getMinorVersion() == 0) {
                mediaType = WordPerfectParser.WP_5_0;
            } else if (parseFileHeader.getMinorVersion() == 1) {
                mediaType = WordPerfectParser.WP_5_1;
            }
            metadata.set("Content-Type", mediaType.toString());
            throw new UnsupportedFormatException("Parser doesn't support this version:" + mediaType.toString());
        }
        if (parseFileHeader.getMajorVersion() != 2) {
            metadata.set("Content-Type", WordPerfectParser.WP_UNK.toString());
            throw new UnsupportedFormatException("Parser doesn't recognize this version");
        }
        if (parseFileHeader.isEncrypted()) {
            throw new EncryptedDocumentException();
        }
        extractDocumentText(wPInputStream, parseFileHeader.getDocAreaPointer(), xHTMLContentHandler);
    }

    private void applyMetadata(WP6FileHeader wP6FileHeader, Metadata metadata) {
        metadata.set(WordPerfect.FILE_SIZE, Long.toString(wP6FileHeader.getFileSize()));
        metadata.set(WordPerfect.FILE_ID, wP6FileHeader.getFileId());
        metadata.set(WordPerfect.PRODUCT_TYPE, wP6FileHeader.getProductType());
        metadata.set(WordPerfect.FILE_TYPE, wP6FileHeader.getFileType());
        metadata.set(WordPerfect.MAJOR_VERSION, wP6FileHeader.getMajorVersion());
        metadata.set(WordPerfect.MINOR_VERSION, wP6FileHeader.getMinorVersion());
        metadata.set(WordPerfect.ENCRYPTED, Boolean.toString(wP6FileHeader.isEncrypted()));
    }

    private void extractDocumentText(WPInputStream wPInputStream, long j, XHTMLContentHandler xHTMLContentHandler) throws IOException, SAXException {
        xHTMLContentHandler.startElement("p");
        for (int i = 0; i < j; i++) {
            wPInputStream.readWPByte();
        }
        StringBuilder sb = new StringBuilder(4096);
        while (true) {
            int read = wPInputStream.read();
            if (read == -1) {
                xHTMLContentHandler.characters(sb.toString());
                sb.setLength(0);
                xHTMLContentHandler.endElement("p");
                return;
            }
            if (read > 0 && read <= 32) {
                sb.append(WP6Constants.DEFAULT_EXTENDED_INTL_CHARS[read]);
            } else if (read >= 33 && read <= 126) {
                sb.append((char) read);
            } else if (read == 128) {
                sb.append(' ');
            } else if (read == 129) {
                sb.append((char) 160);
            } else if (read == 129) {
                sb.append('-');
            } else if (read == 135 || read == 137) {
                sb.append('\n');
            } else if (read == 138) {
                skipUntilChar(wPInputStream, 139);
            } else if (read == 198) {
                sb.append('\t');
            } else if (read >= 180 && read <= 207) {
                sb.append('\n');
            } else if (read >= 208 && read <= 239) {
                int readWP = wPInputStream.readWP();
                int readWPShort = wPInputStream.readWPShort();
                for (int i2 = 0; i2 < readWPShort - 4; i2++) {
                    wPInputStream.readWP();
                }
                if (read == 208) {
                    if (readWP >= 1 && readWP <= 3) {
                        sb.append(' ');
                    } else if (readWP == 10) {
                        sb.append('\t');
                    } else if (readWP >= 4 && readWP <= 19) {
                        sb.append('\n');
                    } else if (readWP >= 20 && readWP <= 22) {
                        sb.append(' ');
                    } else if (readWP >= 23 && readWP <= 28) {
                        sb.append('\n');
                    }
                } else if (read == 213) {
                    sb.append(' ');
                } else if (read == 224) {
                    sb.append('\t');
                }
            } else if (read == 240) {
                int readWP2 = wPInputStream.readWP();
                int readWP3 = wPInputStream.readWP();
                wPInputStream.readWP();
                if (readWP3 == 4 || readWP3 == 5) {
                    sb.append(WP6Constants.EXTENDED_CHARSETS[readWP3][readWP2]);
                } else {
                    sb.append("[TODO:charset" + readWP3 + "]");
                }
            } else if (read >= 241 && read <= 254) {
                skipUntilChar(wPInputStream, read);
            } else if (read == 255) {
                skipUntilChar(wPInputStream, read);
            }
            if (sb.length() >= 4096) {
                xHTMLContentHandler.characters(sb.toString());
                sb.setLength(0);
            }
        }
    }

    private int skipUntilChar(WPInputStream wPInputStream, int i) throws IOException {
        int read;
        int i2 = 0;
        do {
            read = wPInputStream.read();
            if (read == -1) {
                return i2;
            }
            i2++;
        } while (read != i);
        return i2;
    }

    private WP6FileHeader parseFileHeader(WPInputStream wPInputStream) throws IOException {
        WP6FileHeader wP6FileHeader = new WP6FileHeader();
        wPInputStream.mark(30);
        wP6FileHeader.setFileId(wPInputStream.readWPString(4));
        wP6FileHeader.setDocAreaPointer(wPInputStream.readWPLong());
        wP6FileHeader.setProductType(wPInputStream.readWP());
        wP6FileHeader.setFileType(wPInputStream.readWPChar());
        wP6FileHeader.setMajorVersion(wPInputStream.readWP());
        wP6FileHeader.setMinorVersion(wPInputStream.readWP());
        wP6FileHeader.setEncrypted(wPInputStream.readWPShort() != 0);
        wP6FileHeader.setIndexAreaPointer(wPInputStream.readWPShort());
        try {
            wPInputStream.skip(4L);
            wP6FileHeader.setFileSize(wPInputStream.readWPLong());
        } catch (IOException e) {
        }
        wPInputStream.reset();
        return wP6FileHeader;
    }
}
