package com.bringspring.system.msgcenter.util;

import cn.hutool.core.collection.CollectionUtil;
import cn.hutool.core.date.TimeInterval;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.dfa.FoundWord;
import cn.hutool.dfa.SensitiveProcessor;
import cn.hutool.dfa.WordTree;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONException;
import com.bringspring.common.config.ConfigValueUtil;
import com.bringspring.common.util.DateUtil;
import com.bringspring.common.util.StringUtils;
import com.bringspring.system.base.entity.SysConfigEntity;
import com.bringspring.system.base.exception.BaseException;
import com.bringspring.system.base.service.SysConfigService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.stereotype.Component;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.Executor;

import static com.bringspring.system.msgcenter.constant.Constants.CacheKeys;

/**
 * 消息中心-敏感词工具类
 */
@Component
@Slf4j
public class SensitiveWordUtil implements ApplicationRunner {

    @Autowired
    private ConfigValueUtil configValueUtil;
    @Autowired
    private SysConfigService sysConfigService;
    /**
     * 取出线程池
     */
    @Autowired
    private Executor threadPoolExecutor;

    private static WordTree sensitiveTree = new WordTree();


    @Override
    public void run(ApplicationArguments args) {
        Set<String> sensitiveWords = new HashSet<>();

        /**
         * 读取系统默认敏感词库
         */
        String systemFilePath = configValueUtil.getSystemFilePath();
        // sensitive_word.txt 系统默认敏感词库
        File file = FileUtil.file(systemFilePath + File.separator + "sensitive_word.txt");
        try (BufferedReader bufferedReader = new BufferedReader(new FileReader(file))) { // try-with-resources
            String line;
            while ((line = bufferedReader.readLine()) != null) {
                sensitiveWords.add(line.trim());
            }
        } catch (IOException e) {
            log.error("读取敏感词文件失败", e);
        }

        try {
            /**
             * 读取配置敏感词
             */
            SysConfigEntity mcSensitive = sysConfigService.getConfigByKeyName(CacheKeys.SENSITIVE_WORDS);
            if (ObjectUtil.isNotEmpty(mcSensitive)) {
                String keyValue = mcSensitive.getKeyValue();
                if (StringUtils.isNotEmpty(keyValue)) {
                    JSONArray objects = JSONArray.parseArray(keyValue);
                    objects.stream().forEach(s ->
                            sensitiveWords.add(s.toString().trim())
                    );
                }
            }

        } catch (BaseException | JSONException e) {
            e.printStackTrace();
        }

        this.init(sensitiveWords, true);
        log.info("消息中心-初始化敏感词库完毕, 共" + sensitiveWords.size() + "个敏感词");
    }

    /**
     * 初始化敏感词树
     *
     * @param isAsync        是否异步初始化
     * @param sensitiveWords 敏感词列表
     */
    public void init(final Collection<String> sensitiveWords, boolean isAsync) {
        if (isAsync) {
            threadPoolExecutor.execute(() -> {
                init(sensitiveWords);
            });
        } else {
            init(sensitiveWords);
        }
    }

    /**
     * 初始化敏感词树
     *
     * @param sensitiveWords 敏感词列表
     */
    public void init(Collection<String> sensitiveWords) {
        sensitiveTree.clear();
        sensitiveTree.addWords(sensitiveWords);
    }

    /**
     * 查找敏感词，返回找到的第一个敏感词
     *
     * @param text 文本
     * @return 敏感词
     * @since 5.5.3
     */
    public static FoundWord getFoundFirstSensitive(String text) {
        return sensitiveTree.matchWord(text);
    }

    /**
     * 查找敏感词，返回找到的所有敏感词
     *
     * @param text 文本
     * @return 敏感词
     */
    public static List<FoundWord> getFoundAllSensitive(String text) {
        return sensitiveTree.matchAllWords(text);
    }

    /**
     * 查找敏感词，返回找到的所有敏感词<br>
     * 密集匹配原则：假如关键词有 ab,b，文本是abab，将匹配 [ab,b,ab]<br>
     * 贪婪匹配（最长匹配）原则：假如关键字a,ab，最长匹配将匹配[a, ab]
     *
     * @param text           文本
     * @param isDensityMatch 是否使用密集匹配原则
     * @param isGreedMatch   是否使用贪婪匹配（最长匹配）原则
     * @return 敏感词
     */
    public static List<FoundWord> getFoundAllSensitive(String text, boolean isDensityMatch, boolean isGreedMatch) {
        return sensitiveTree.matchAllWords(text, -1, isDensityMatch, isGreedMatch);
    }

    /**
     * 处理过滤文本中的敏感词，默认替换成*
     *
     * @param text         文本
     * @param isGreedMatch 贪婪匹配（最长匹配）原则：假如关键字a,ab，最长匹配将匹配[a, ab]
     * @return 敏感词过滤处理后的文本
     */
    public static String sensitiveFilter(String text, boolean isGreedMatch) {
        return sensitiveFilter(text, isGreedMatch, null);
    }

    /**
     * 处理过滤文本中的敏感词，默认替换成*
     *
     * @param text               文本
     * @param isGreedMatch       贪婪匹配（最长匹配）原则：假如关键字a,ab，最长匹配将匹配[a, ab]
     * @param sensitiveProcessor 敏感词处理器，默认按匹配内容的字符数替换成*
     *                           SensitiveDefaultProcessor、SensitiveHighlightProcessor
     * @return 敏感词过滤处理后的文本
     */
    public static String sensitiveFilter(String text, boolean isGreedMatch, SensitiveProcessor sensitiveProcessor) {
        if (StringUtils.isEmpty(text)) {
            return text;
        }
        TimeInterval timer = DateUtil.timer();
        //敏感词过滤场景下，不需要密集匹配
        List<FoundWord> foundWordList = getFoundAllSensitive(text, false, isGreedMatch);
        if (CollectionUtil.isEmpty(foundWordList)) {
            return text;
        }

        // 敏感词过滤处理器，默认按字符数替换成*
        sensitiveProcessor = sensitiveProcessor == null ? new SensitiveProcessor() {} : sensitiveProcessor;
        // 使用TreeMap并按起始位置排序
        TreeMap<Integer, FoundWord> foundWordMap = new TreeMap<>();
        foundWordList.forEach(fw -> foundWordMap.put(fw.getStartIndex(), fw));

        StringBuilder textStringBuilder = new StringBuilder();
        int length = text.length();
        int currentIndex = 0;

        while (currentIndex < length) {
            Map.Entry<Integer, FoundWord> entry = foundWordMap.ceilingEntry(currentIndex);
            if (entry == null) {
                textStringBuilder.append(text.substring(currentIndex));
                break;
            }

            FoundWord fw = entry.getValue();
            // 添加非敏感词部分
            textStringBuilder.append(text, currentIndex, fw.getStartIndex());
            // 处理敏感词
            textStringBuilder.append(sensitiveProcessor.process(fw));
            currentIndex = fw.getEndIndex() + 1; // 移动到敏感词结束位置的下一个字符
        }

        log.info("过滤敏感词, 耗时: " + timer.intervalMs() + "ms");
        return textStringBuilder.toString();
    }

}
