package com.arcadedb.integration.importer.vector;

import com.arcadedb.database.Database;
import com.arcadedb.database.DatabaseFactory;
import com.arcadedb.database.DatabaseInternal;
import com.arcadedb.index.vector.HnswVectorIndexRAM;
import com.arcadedb.index.vector.VectorUtils;
import com.arcadedb.index.vector.distance.DistanceFunctionFactory;
import com.arcadedb.integration.importer.ConsoleLogger;
import com.arcadedb.integration.importer.ImporterContext;
import com.arcadedb.integration.importer.ImporterSettings;
import com.arcadedb.schema.Type;
import com.arcadedb.utility.CodeUtils;
import com.arcadedb.utility.DateUtils;
import com.github.jelmerk.knn.DistanceFunction;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Locale;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/* loaded from: input_file:com/arcadedb/integration/importer/vector/TextEmbeddingsImporter.class */
public class TextEmbeddingsImporter {
    private final InputStream inputStream;
    private final ImporterSettings settings;
    private final ConsoleLogger logger;
    private int m;
    private int ef;
    private int efConstruction;
    private boolean normalizeVectors;
    private String databasePath;
    private DatabaseFactory factory;
    private Database database;
    private long beginTime;
    private String vectorTypeName;
    private String distanceFunctionName;
    private String vectorPropertyName;
    private String idPropertyName;
    private String deletedPropertyName;
    private boolean overwriteDatabase = false;
    private long errors = 0;
    private long warnings = 0;
    private boolean error = false;
    private ImporterContext context = new ImporterContext();
    private volatile long embeddingsParsed = 0;
    private volatile long indexedEmbedding = 0;
    private volatile long verticesCreated = 0;
    private volatile long verticesConnected = 0;

    public TextEmbeddingsImporter(DatabaseInternal databaseInternal, InputStream inputStream, ImporterSettings importerSettings) throws ClassNotFoundException {
        this.m = 16;
        this.ef = 256;
        this.efConstruction = 256;
        this.normalizeVectors = false;
        this.vectorTypeName = "Float";
        this.distanceFunctionName = "InnerProduct";
        this.vectorPropertyName = "vector";
        this.idPropertyName = "name";
        this.deletedPropertyName = "deleted";
        this.settings = importerSettings;
        this.database = databaseInternal;
        this.databasePath = databaseInternal.getDatabasePath();
        this.inputStream = inputStream;
        this.logger = new ConsoleLogger(importerSettings.verboseLevel);
        if (importerSettings.options.containsKey("distanceFunction")) {
            this.distanceFunctionName = importerSettings.options.get("distanceFunction");
            this.distanceFunctionName = Character.toUpperCase(this.distanceFunctionName.charAt(0)) + this.distanceFunctionName.substring(1).toLowerCase(Locale.ENGLISH);
        }
        if (importerSettings.options.containsKey("vectorType")) {
            this.vectorTypeName = importerSettings.options.get("vectorType");
            this.vectorTypeName = Character.toUpperCase(this.vectorTypeName.charAt(0)) + this.vectorTypeName.substring(1).toLowerCase(Locale.ENGLISH);
        }
        if (importerSettings.options.containsKey("vectorProperty")) {
            this.vectorPropertyName = importerSettings.options.get("vectorProperty");
        }
        if (importerSettings.options.containsKey("idProperty")) {
            this.idPropertyName = importerSettings.options.get("idProperty");
        }
        if (importerSettings.options.containsKey("deletedProperty")) {
            this.deletedPropertyName = importerSettings.options.get("deletedProperty");
        }
        if (importerSettings.options.containsKey("m")) {
            this.m = Integer.parseInt(importerSettings.options.get("m"));
        }
        if (importerSettings.options.containsKey("ef")) {
            this.ef = Integer.parseInt(importerSettings.options.get("ef"));
        }
        if (importerSettings.options.containsKey("efConstruction")) {
            this.efConstruction = Integer.parseInt(importerSettings.options.get("efConstruction"));
        }
        if (importerSettings.options.containsKey("normalizeVectors")) {
            this.normalizeVectors = Boolean.parseBoolean(importerSettings.options.get("normalizeVectors"));
        }
    }

    public Database run() throws IOException, ClassNotFoundException, InterruptedException {
        Type type;
        if (!createDatabase()) {
            return null;
        }
        DistanceFunction implementationByName = DistanceFunctionFactory.getImplementationByName(this.vectorTypeName + this.distanceFunctionName);
        this.beginTime = System.currentTimeMillis();
        List<TextFloatsEmbedding> loadFromFile = loadFromFile();
        if (this.settings.documentsSkipEntries != null) {
            for (int i = 0; i < this.settings.documentsSkipEntries.longValue(); i++) {
                loadFromFile.remove(0);
            }
        }
        if (!loadFromFile.isEmpty()) {
            int dimensions = loadFromFile.get(1).dimensions();
            this.logger.logLine(2, "- Parsed %,d embeddings with %,d dimensions in RAM", Integer.valueOf(loadFromFile.size()), Integer.valueOf(dimensions));
            HnswVectorIndexRAM build = HnswVectorIndexRAM.newBuilder(dimensions, implementationByName, loadFromFile.size()).withM(this.m).withEf(this.ef).withEfConstruction(this.efConstruction).build();
            build.addAll(loadFromFile, Runtime.getRuntime().availableProcessors(), (i2, i3) -> {
                this.indexedEmbedding++;
            }, 1);
            if (this.vectorTypeName.equals("Short")) {
                type = Type.ARRAY_OF_SHORTS;
            } else if (this.vectorTypeName.equals("Integer")) {
                type = Type.ARRAY_OF_INTEGERS;
            } else if (this.vectorTypeName.equals("Long")) {
                type = Type.ARRAY_OF_LONGS;
            } else if (this.vectorTypeName.equals("Float")) {
                type = Type.ARRAY_OF_FLOATS;
            } else {
                if (!this.vectorTypeName.equals("Double")) {
                    throw new IllegalArgumentException("Type '" + this.vectorTypeName + "' not supported");
                }
                type = Type.ARRAY_OF_DOUBLES;
            }
            build.createPersistentIndex(this.database).withVertexType(this.settings.vertexTypeName).withEdgeType(this.settings.edgeTypeName).withVectorProperty(this.vectorPropertyName, type).withIdProperty(this.idPropertyName).withDeletedProperty(this.deletedPropertyName).withVertexCreationCallback((vertex, item, j) -> {
                this.verticesCreated++;
            }).withCallback((document, j2) -> {
                this.verticesConnected++;
            }).withBatchSize(1000).create();
        }
        this.logger.logLine(1, "***************************************************************************************************", new Object[0]);
        this.logger.logLine(1, "Import of Text Embeddings database completed in %s with %,d errors and %,d warnings.", DateUtils.formatElapsed(System.currentTimeMillis() - this.beginTime), Long.valueOf(this.errors), Long.valueOf(this.warnings));
        this.logger.logLine(1, "\nSUMMARY\n", new Object[0]);
        this.logger.logLine(1, "- Embeddings.................................: %,d", Integer.valueOf(loadFromFile.size()));
        this.logger.logLine(1, "***************************************************************************************************", new Object[0]);
        this.logger.logLine(1, "", new Object[0]);
        if (this.database != null) {
            this.logger.logLine(1, "NOTES:", new Object[0]);
            this.logger.logLine(1, "- you can find your new ArcadeDB database in '" + this.database.getDatabasePath() + "'", new Object[0]);
        }
        return this.database;
    }

    public void printProgress() {
        float f = 0.0f;
        if (this.verticesConnected > 0) {
            f = 40.0f + ((((float) this.verticesConnected) * 60.0f) / ((float) this.embeddingsParsed));
        } else if (this.verticesCreated > 0) {
            f = 10.0f + ((((float) this.verticesCreated) * 30.0f) / ((float) this.embeddingsParsed));
        } else if (this.indexedEmbedding > 0) {
            f = (((float) this.indexedEmbedding) * 10.0f) / ((float) this.embeddingsParsed);
        }
        String format = String.format("- %.2f%%", Float.valueOf(f));
        if (this.embeddingsParsed > 0) {
            format = format + String.format(" - %,d embeddings parsed", Long.valueOf(this.embeddingsParsed));
        }
        if (this.indexedEmbedding > 0) {
            format = format + String.format(" - %,d embeddings indexed", Long.valueOf(this.indexedEmbedding));
        }
        if (this.verticesCreated > 0) {
            format = format + String.format(" - %,d vertices created", Long.valueOf(this.verticesCreated));
        }
        if (this.verticesConnected > 0) {
            format = format + String.format(" - %,d vertices connected", Long.valueOf(this.verticesConnected));
        }
        this.logger.logLine(2, format + " (elapsed " + DateUtils.formatElapsed(System.currentTimeMillis() - this.beginTime) + ")", new Object[0]);
    }

    private boolean createDatabase() {
        if (this.database != null) {
            return true;
        }
        this.factory = new DatabaseFactory(this.databasePath);
        if (this.factory.exists()) {
            if (!this.overwriteDatabase) {
                this.logger.errorLine("Database already exists on path '%s'", this.databasePath);
                this.errors++;
                return false;
            }
            this.database = this.factory.open();
            this.logger.errorLine("Found existent database at '%s', dropping it and recreate a new one", this.databasePath);
            this.database.drop();
        }
        this.database = this.factory.create();
        return true;
    }

    public boolean isError() {
        return this.error;
    }

    public ImporterContext getContext() {
        return this.context;
    }

    public TextEmbeddingsImporter setContext(ImporterContext importerContext) {
        this.context = importerContext;
        return this;
    }

    private List<TextFloatsEmbedding> loadFromFile() throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(this.inputStream));
        try {
            Stream<String> lines = bufferedReader.lines();
            if (this.settings.parsingLimitEntries > 0) {
                lines.limit(this.settings.parsingLimitEntries);
            }
            AtomicInteger atomicInteger = new AtomicInteger(301);
            List<TextFloatsEmbedding> list = (List) lines.map(str -> {
                this.embeddingsParsed++;
                List split = CodeUtils.split(str, ' ', -1, atomicInteger.get());
                String str = (String) split.get(0);
                float[] fArr = new float[split.size() - 1];
                for (int i = 1; i < split.size() - 1; i++) {
                    fArr[i] = Float.parseFloat((String) split.get(i));
                }
                atomicInteger.set(fArr.length);
                if (this.normalizeVectors) {
                    fArr = VectorUtils.normalize(fArr);
                }
                return new TextFloatsEmbedding(str, fArr);
            }).collect(Collectors.toList());
            bufferedReader.close();
            return list;
        } catch (Throwable th) {
            try {
                bufferedReader.close();
            } catch (Throwable th2) {
                th.addSuppressed(th2);
            }
            throw th;
        }
    }
}
