/*
 * Decompiled with CFR 0.152.
 */
package org.opensearch.ml.engine.analysis;

import ai.djl.huggingface.tokenizers.HuggingFaceTokenizer;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.nio.file.attribute.FileAttribute;
import java.util.Map;
import java.util.Objects;
import lombok.Generated;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Tokenizer;
import org.opensearch.common.settings.Settings;
import org.opensearch.env.Environment;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.analysis.AbstractTokenizerFactory;
import org.opensearch.ml.engine.analysis.DJLUtils;
import org.opensearch.ml.engine.analysis.HFModelTokenizer;
import org.opensearch.ml.engine.utils.ZipUtils;

public class HFModelTokenizerFactory
extends AbstractTokenizerFactory {
    @Generated
    private static final Logger log = LogManager.getLogger(HFModelTokenizerFactory.class);
    public static final String DEFAULT_TOKENIZER_NAME = "bert-uncased";
    public static final String DEFAULT_MULTILINGUAL_TOKENIZER_NAME = "mbert-uncased";

    public static Tokenizer createDefault() {
        return new HFModelTokenizer(() -> DefaultTokenizerHolder.getInstance().tokenizer, () -> DefaultTokenizerHolder.getInstance().tokenWeights);
    }

    public static Tokenizer createDefaultMultilingual() {
        return new HFModelTokenizer(() -> DefaultMultilingualTokenizerHolder.getInstance().tokenizer, () -> DefaultMultilingualTokenizerHolder.getInstance().tokenWeights);
    }

    public HFModelTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
        super(indexSettings, settings, name);
    }

    public Tokenizer create() {
        return HFModelTokenizerFactory.createDefault();
    }

    private static class DefaultMultilingualTokenizerHolder
    extends BaseTokenizerHolder {
        private static final String RESOURCE_PATH = "/analysis/mbert-uncased.zip";
        private static volatile DefaultMultilingualTokenizerHolder INSTANCE;

        private DefaultMultilingualTokenizerHolder() {
            super(RESOURCE_PATH, HFModelTokenizerFactory.DEFAULT_MULTILINGUAL_TOKENIZER_NAME);
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         * Enabled force condition propagation
         * Lifted jumps to return sites
         */
        public static DefaultMultilingualTokenizerHolder getInstance() {
            if (!Objects.isNull(INSTANCE) && !Objects.isNull(DefaultMultilingualTokenizerHolder.INSTANCE.tokenizer) && !Objects.isNull(DefaultMultilingualTokenizerHolder.INSTANCE.tokenWeights)) return INSTANCE;
            Class<DefaultMultilingualTokenizerHolder> clazz = DefaultMultilingualTokenizerHolder.class;
            synchronized (DefaultMultilingualTokenizerHolder.class) {
                if (!Objects.isNull(INSTANCE) && !Objects.isNull(DefaultMultilingualTokenizerHolder.INSTANCE.tokenizer) && !Objects.isNull(DefaultMultilingualTokenizerHolder.INSTANCE.tokenWeights)) return INSTANCE;
                try {
                    INSTANCE = new DefaultMultilingualTokenizerHolder();
                }
                catch (RuntimeException e) {
                    log.error(e.getMessage());
                }
                return INSTANCE;
            }
        }
    }

    private static class DefaultTokenizerHolder
    extends BaseTokenizerHolder {
        private static final String RESOURCE_PATH = "/analysis/bert-uncased.zip";
        private static volatile DefaultTokenizerHolder INSTANCE;

        private DefaultTokenizerHolder() {
            super(RESOURCE_PATH, HFModelTokenizerFactory.DEFAULT_TOKENIZER_NAME);
        }

        /*
         * WARNING - Removed try catching itself - possible behaviour change.
         * Enabled force condition propagation
         * Lifted jumps to return sites
         */
        public static DefaultTokenizerHolder getInstance() {
            if (!Objects.isNull(INSTANCE) && !Objects.isNull(DefaultTokenizerHolder.INSTANCE.tokenizer) && !Objects.isNull(DefaultTokenizerHolder.INSTANCE.tokenWeights)) return INSTANCE;
            Class<DefaultTokenizerHolder> clazz = DefaultTokenizerHolder.class;
            synchronized (DefaultTokenizerHolder.class) {
                if (!Objects.isNull(INSTANCE) && !Objects.isNull(DefaultTokenizerHolder.INSTANCE.tokenizer) && !Objects.isNull(DefaultTokenizerHolder.INSTANCE.tokenWeights)) return INSTANCE;
                try {
                    INSTANCE = new DefaultTokenizerHolder();
                }
                catch (RuntimeException e) {
                    log.error(e.getMessage());
                }
                return INSTANCE;
            }
        }
    }

    private static abstract class BaseTokenizerHolder {
        private static final String ZIP_SUFFIX = ".zip";
        private static final String TOKENIZER_FILE_NAME = "tokenizer.json";
        private static final String TOKEN_WEIGHTS_FILE_NAME = "idf.json";
        final HuggingFaceTokenizer tokenizer;
        final Map<String, Float> tokenWeights;
        final String name;

        BaseTokenizerHolder(String resourcePath, String name) {
            try (InputStream is = HFModelTokenizerFactory.class.getResourceAsStream(resourcePath);){
                if (Objects.isNull(is)) {
                    throw new RuntimeException("Invalid resource path " + resourcePath);
                }
                Files.createDirectories(DJLUtils.getMlEngine().getAnalysisRootPath(), new FileAttribute[0]);
                File tempZipFile = File.createTempFile(name, ZIP_SUFFIX, DJLUtils.getMlEngine().getAnalysisRootPath().toFile());
                Files.copy(is, tempZipFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
                ZipUtils.unzip(tempZipFile, DJLUtils.getMlEngine().getAnalysisRootPath().resolve(name));
            }
            catch (IOException e) {
                throw new RuntimeException("Failed to extract " + name + "analyzer zip file.  " + String.valueOf(e));
            }
            try {
                this.tokenizer = DJLUtils.buildHuggingFaceTokenizer(DJLUtils.getMlEngine().getAnalysisRootPath().resolve(name).resolve(TOKENIZER_FILE_NAME));
                this.tokenWeights = DJLUtils.fetchTokenWeights(DJLUtils.getMlEngine().getAnalysisRootPath().resolve(name).resolve(TOKEN_WEIGHTS_FILE_NAME));
                this.name = name;
            }
            catch (Exception e) {
                throw new RuntimeException("Failed to initialize tokenizer: " + name, e);
            }
        }
    }
}

