import os
import sys
import re
from loguru import logger
import base64
#from config.config import PROMPT
from openai import OpenAI
import time
import requests
import json
from datetime import datetime
import cv2
import ast
from config.config import LAYOUT_CHECK_URL,FORMULA_DETECTION_URL,LAYOUT_CHECK_URL_19
from config.config import Official_API_KEY
from config.config import Official_OPENAI_URL,ENV
from config.config import QWEN_URL,QWEN_API_KEY

from utils.common import get_millisecond_time
from tasks.mysql_utils import DBUtils    
#class Formula_Checker(metaclass=Singleton):
class Formula_Checker():
    def __init__(self):
        #self.GPT = GPTModel()
        #self.PDFUTILS = PDFUtils()
        #self.PDFTOOLS = PDFTools()
        #self.chapter_prompt_task_id = 8000000
        #self.chapter_prompt_task_id_again = 8000001
        pass

    def perform_re_check(self,ocr_result):
        """
        检查OCR结果中是否包含公式。

        参数:
        ocr_result (dict): 包含OCR识别结果的字典，格式为:
                            {
                                'errorCode': int,
                                'msg': str,
                                'data': list[str]
                            }

        返回:
        bool: 如果包含公式则返回True，否则返回False。
        """
        # 正则表达式用于匹配数学公式中的常见符号和结构，避免简单变量和表达式
        # formula_pattern = re.compile(
        # r"([A-Za-z]+\s*=\s*[A-Za-z0-9+\-*/^()]+(?:\s*[+\-*/^]+\s*[A-Za-filter_correct_formulas_elem-9+\-*/^()]+)+)|"  # 复杂公式，包含多个运算符
        # r"(\b√[A-Za-z0-9]+\b)|"                                                        # 根号
        # r"(\bΔ\b)|"                                                                    # Δ
        # r"(\([A-Za-z0-9+\-*/^()]+\)\s*[+\-*/^]\s*\([A-Za-z0-9+\-*/^()]+\))|"
        # r"([A-Za-z]*\d*[(x)(y)(z)(a)(b)(c)]*\s*[+\-*/^]+\s*\(?[A-Za-z0-9+\-*/^()]+\)?)" # 公式中的符号运算
        # )
        formula_pattern = re.compile(
        r"([A-Za-z]+\s*=\s*[A-Za-z0-9+\-*/^()]+)|"  # 一般公式，包含多个运算符或字母
        r"(\b√[A-Za-z0-9]+\b)|"                     # 根号
        r"(\bΔ\b)|"                                 # Δ
        r"(\([A-Za-z0-9+\-*/^()]+\)\s*[+\-*/^]\s*\([A-Za-z0-9+\-*/^()]+\))|"  # 复杂括号表达式
        r"([A-Za-z]*\d*[(x)(y)(z)(a)(b)(c)]*\s*[+\-*/^]+\s*\(?[A-Za-z0-9+\-*/^()]+\)?)"  # 公式中的符号运算
    )
        if ocr_result['errorCode'] != 0:
            logger.error(f"OCR识别失败，错误信息: {ocr_result['msg']}")
            return False
        
        # 将OCR结果中的文本内容合并成一个字符串
        combined_text = ' '.join(ocr_result['data'])
        
        # 检查合并后的文本中是否包含公式
        if formula_pattern.search(combined_text):
            logger.info("发现公式")
            return True

        logger.info("OCR结果中不包含公式。")
        return False    
    def perform_ocr(self,file_path):
        """
        进行OCR识别的函数。

        参数:
        file_path (str): 图片文件的路径

        返回:
        dict: OCR识别结果的JSON响应
        """
        # url = "https://dcg-ai-red-list.5rs.me/v1/dcg_ocr"
        url = OCR_URL
        params = {
            "userid": "yxI_110",
            "client_id": "dcg-red-list"
        }
        headers = {"Authorization": "Bearer dcg-MTQ2MDRkYWRmNzRjMDg0ZjZmNTc3YTliMWM0YzYwYmVlZDE="}
        timeout_duration = 5
        proxies = {"http": None, "https": None}
        try:
            logger.info("开始进行OCR请求")
            with open(file_path, "rb") as file:
                res = requests.post(url, files={"file": file}, data=params, headers=headers,timeout=timeout_duration, proxies=proxies)
            
            if res.status_code == 200:
                logger.info("OCR请求成功")
                return res.json()
            else:
                logger.error(f"OCR请求失败，状态码: {res.status_code}")
                return {"error": f"Request failed with status code {res.status_code}"}
        except Exception as e:
            logger.exception("OCR请求过程中出现异常")
            return {"error": str(e)}
    def perform_ocr_all(self,file_path):
        """
        进行OCR识别的函数。

        参数:
        file_path (str): 图片文件的路径

        返回:
        dict: OCR识别结果的JSON响应
        """
        # url = "https://dcg-ai-red-list.5rs.me/v1/dcg_ocr"
        url = OCR_URL
        params = {
            "userid": "yxI_110",
            "client_id": "dcg-red-list",
            "show_details":"True"
        }
        headers = {"Authorization": "Bearer dcg-MTQ2MDRkYWRmNzRjMDg0ZjZmNTc3YTliMWM0YzYwYmVlZDE="}
        timeout_duration = 3
        proxies = {"http": None, "https": None}
        #response = requests.post(url, data=json_data, headers=headers, timeout=600, proxies=proxies)
        try:
            logger.info("开始进行OCR请求")
            with open(file_path, "rb") as file:
                res = requests.post(url, files={"file": file}, data=params, headers=headers,timeout=timeout_duration, proxies=proxies)
            
            if res.status_code == 200:
                logger.info("OCR请求成功")
                return res.json()
            else:
                logger.error(f"OCR请求失败，状态码: {res.status_code}")
                return {"error": f"Request failed with status code {res.status_code}"}
        except Exception as e:
            logger.exception("OCR请求过程中出现异常")
            return {"error": str(e)}
    def filter_correct_formulas(self,data):
        filtered_result = []
        logger.info('in filter_correct_formulas ,data={}'.format(data))
        for entry in data:
            error_formulas = entry['error_formula']
            corrected_formulas = entry['corrected_formula']
            error_reasons = entry['error_reason']

            # 创建新的条目字典，初始化为空列表
            new_entry = {
                'error_formula': [],
                'error_reason': [],
                'corrected_formula': []
            }

            for i in range(len(error_formulas)):
                if error_formulas[i] != corrected_formulas[i]:
                    new_entry['error_formula'].append(error_formulas[i])
                    new_entry['error_reason'].append(error_reasons[i])
                    new_entry['corrected_formula'].append(corrected_formulas[i])

            # 只有在新条目不为空时才添加到过滤结果中
            if new_entry['error_formula']:
                filtered_result.append(new_entry)

        return filtered_result
    # def filter_correct_formulas_elem(self,data):
    #     filtered_result = {
    #         'error_formula': [],
    #         'error_reason': [],
    #         'corrected_formula': []
    #     }
        
    #     error_formulas = data['error_formula']
    #     corrected_formulas = data['corrected_formula']
    #     error_reasons = data['error_reason']
    #     filter_list=['与标准方程的定义不符','公式未完整显示','公式顺序错误','计算错误','公式未完成','公式不完整','分母部分不正确','分子部分不准确','乘法的形式','坐标错误','公式缺少负号','速度可以为负值','总电阻的倒数等于各电阻倒数之和','分母应为']
                     
    #     for i in range(len(error_formulas)):
           
    #         # if "=" not in error_formulas[i]:
    #         #     continue
    #         # temp_error=error_formulas[i]
    #         # temp_corrected=corrected_formulas[i]
    #         if error_formulas[i] != corrected_formulas[i] and error_reasons[i] not in filter_list and '\frac{\Delta \Phi}{R}' not in error_formulas[i]:
    #             filtered_result['error_formula'].append(error_formulas[i])
    #             filtered_result['error_reason'].append(error_reasons[i])
    #             filtered_result['corrected_formula'].append(corrected_formulas[i])

    #     return filtered_result
    def check_numbers_in_string(self,text):
        """
        检查字符串中的数字，如果包含大于或等于5的数字返回False，否则返回True。

        :param text: 输入的字符串
        :return: 布尔值
        """
        # 提取字符串中的所有数字，包括中间可能有空格的情况
        numbers = re.findall(r'\d(?:\s*\d)*', text)  # 匹配数字，允许中间有空格
        #print("提取的原始数字:", numbers)
        
        # 去除空格并转换为整数类型
        int_numbers = [int(num.replace(' ', '')) for num in numbers]
        #print("转换为整数后的数字:", int_numbers)
        
        # 检查是否存在大于或等于5的数字
        for num in int_numbers:
            if num >= 5:
                return False
        return True

    def filter_correct_formulas_elem(self, data):
        filtered_result = {
            'error_formula': [],
            'error_reason': [],
            'corrected_formula': []
        }
        
        error_formulas = data['error_formula']
        corrected_formulas = data['corrected_formula']
        error_reasons = data['error_reason']
        
        filter_list = [
            '无意义',
            '不等号的使用不当',
            '上下文中',
            '公式中不应有系数2',
            '不需要正负号',
            '未定义',
            '不规范的变量表示',
            '公式不清晰',
            '使用了小写字母b',
            '不规范的表达式',
            '导致公式不规范',
            '不符合标准的向量表示法',
            '拼写错误',
            '符号错误',
            '向量的表示',
            '错误的向量表示',
            '错误的符号和表达',
            '向量符号书写错误',
            '向量的表示应为',
            '单位书写错误',
            '数字书写错误',
            '变量顺序错误',
            '公式中使用了余弦而不是点积',
            '平均速度与平均速率',
            '单位描述不一致',
            '无实际意义',
            '取值不准确',
            '等式不正确',
            '计算结果',
            '缺少计算结果',
            '公式中缺少负号',
            '公式中缺少乘号',
            '公式与余弦定理不符',#余弦定理的推导式子
            '三角形内角的正弦平方和不等于另一个角的正弦平方',
            '等号使用错误',
            '公式缺少一个比值',
            '不完整的三角形符号',
            '角度的正弦函数',
            '根号符号不正确',
            '符号表示错误',
            '缺少单位向量',
            '逗号',
            '值错误',
            '等号两边不相等',
            '函数值错误',
            '应用错误',
            '包含中文逗号',
            '样本空间定义不一致',
            '多余的变量e',
            '推导过程', #新数据
            '公式等号前后不相等',
            '公式与平行四边形的定义不符',
            '公式中缺少乘法符号',
            '缺少联合符号',
            '矢量表示错误，缺少方向单位向量',
            '方程组的解与原方程不匹配',
            ##过滤目前新的数据的问题
            '等式两边不相等',
            '乘积而不是相乘',
            '公式中变量范围的表示方式不正确',
            '公式描述不准确',
            '不等式错误地变成了等式',
            '缺少等号',
            r"计算错误，30 \times 30 \times 20 不等于 1800",
            r"x^2 = 18000，\sqrt{18000} 不等于 \sqrt{1800}",
            r"x \geq 0 与 x = 30 \sqrt{2} 不一致",#离谱错误，初中数学
            '不等式的解法错误',
            '公式中条件部分应与公式分开',
            '公式描述与法拉第电磁感应定律不符，缺少负号。',
            '电阻分配和电流成反比',
            '不等式的条件错误',
            '不等式错误地转化为等式',
            '公式中平方根的表达式应为绝对值',
            '不等式条件下的等式错误',
            '公式前后不一致',
            '公式缺少描述',
            '化简绝对值时符号错误',
            '不符合前面的计算结果',
            '化简绝对值时符号错误',
            '公式中平方根的简化不正确', #初中数学
            '展开错误','公式不完整','矢量加法和减法错误',
            '公式不完整',
            '公式中重复了等号',
            '公式中的逻辑错误',
            ##未知文稿的错误
            '位移符号错误',#应用物理的句号。被认为下标
            '不够精确',
           '角速度计算错误导致线速度计算错误', '使用了近似值3.14','单位换算错误',
           '化简绝对值时符号错误', '公式中平方根的展开不正确','不正确的解集','平方根计算错误','绝对值函数展开错误',
          '公式中未进行化简', '分解错误','化简错误','公式不完整','公式重复','错误的化简步骤','不符合平方根的正负值','平方根计算错误','不等式条件下的等式不成立','平方根的定义不正确','公式中平方根的表达式应为绝对值',
           '展开错误','进行化简','化简绝对值时符号错误','因式分解错误','公式推导错误', '不是一个完全平方数的分解形式','不是一个正整数','公式重复','公式推导错误', '公式未完整显示', '公式顺序错误', '计算错误', '公式未完成',
            '公式不完整', '分母部分不正确', '分子部分不准确', '乘法的形式', '坐标错误', '公式缺少负号',
            '速度可以为负值', '总电阻的倒数等于各电阻倒数之和', '分母应为','灯','电流符号','重复'
        ]
        filter_latex=[
            '3x \\geq 0 \\text{且}-x \\geq 0',
            "\\frac{\\sqrt{2Rh_{1}}}{\\sqrt{2Rh_{2}}}",
            "r = \\sqrt{2Rh}",
            "\\sqrt { \\frac { x - 5 } { 7 - x } } = \\frac { \\sqrt { x - 5 } } { \\sqrt { 7 - x } }",
            "\\frac { \\sqrt { x + 1 } } { \\sqrt { x - 1 } } = \\sqrt { \\frac { x + 1 } { x - 1 } }",
            'x = \\pm \\sqrt{1800}',
            '\\frac { \\sqrt { 2 R h _ { 1 } } } { \\sqrt { 2 R h _ { 2 } } }',
            'r = \\sqrt { 2 R h }',
            '2-a≥0且a+1≠0',
            '3x≥0且-x≥0',
            '\\sqrt { 3 x } + \\sqrt { - x }',
            'b + \\frac { 1 } { 2 } =0',
            'a-2=0',
            'R _ { 2 } = \\rho \\frac { 2 l } { S _ { 2 } }',
            'x = \\pm \\sqrt{1800}.',
            '\\sqrt{(-36) \\times 16 \\times (-9)}',
            r'm g = q v B + q \\frac { E } { d }',
            r'Q = \\overline { I } \\cdot \\Delta t = \\frac { \\overline { E } } { R } \\cdot \\Delta t = \\frac { \\Delta \\Phi } { R }',
            r'P _ { 热 } = P R',
            r"a = \frac { v ^ { 2 } } { r }",
            r"\sqrt { \\frac { x - 5 } { 7 - x } } = \\frac { \\sqrt { x - 5 } } { \\sqrt { 7 - x } }",
            r"P_{出max}=\frac{E^2}{4r}"
            "P_{出max}=\\frac{E^2}{4r}",
            r"\eta = \frac{U}{E} = \frac{R}{R+r}",
            "\\eta = \\frac{U}{E} = \\frac{R}{R+r}",
            r"P_{出max}=\\frac{E^2}{4r}",
            r"\\sqrt{(a)^2}=a(a\\geq0)",
            r"\sqrt{(a)^2}=a(a\geq0)",
            '\\omega = 2\\pi n',r'\omega = 2\pi n',
            'r"\eta = \frac{U}{E} = \frac{R}{R + r}"',
            r"\cos \angle OF_1B_1 = e",
            r"\omega = 2 \pi n",
            r"\Delta \theta = \frac{\omega^2 - \omega_0^2}{2\beta}",
            r"\sqrt{(y_1 + y_2)^2 - 4y_1 y_2}",
            r"|AB| = \sqrt{1 + \frac{1}{k^2}} |y_1 - y_2|",
            r"\left( \frac{p}{2}, 0 \right)", r"x = -\frac{p}{2}", r"\left( -\frac{p}{2}, 0 \right)", r"x = \frac{p}{2}",    
            ##新一轮过滤
            r"g = \frac{2h}{t^2}",
            r"E=\frac{\Delta \Phi}{\Delta t}",
            r"E = -\frac{\Delta \Phi}{\Delta t}",
            r"Q=I \cdot \Delta t=\frac{E}{R} \cdot \Delta t=\frac{\Delta \Phi}{R}",
            r"E=\frac{\Delta \Phi}{\Delta t}",
            #新优化错误
             r"P(A) = P(AB \\cup AB) = P(AB) + P(AB)",  r"P(AB) = P(A) - P(A)P(B)",   r"P(ABC) = P(A)P(B)P(C)",
            r"P(A) = P(AB \cup AB) = P(AB) + P(AB)",
            r"P(AB) = P(A)P(B)",
            r"P(A) = P(AB \cup AB) = P(AB) + P(AB)",
            r"|AB| = \\sqrt{1 + \\frac{1}{k^2}} |y_1 - y_2|",
            r"c^2 = b^2 + a^2",
            ##新数据
            '|B_1F_1| = a',
            '\\sqrt{\\frac{a}{b}} = \\frac{\\sqrt{a}}{\\sqrt{b}} (a \\ge 0, b > 0)',
            r"\sqrt{\frac{a}{b}} = \frac{\sqrt{a}}{\sqrt{b}} (a \ge 0, b > 0)",
            r'\sqrt{a} \cdot \sqrt{b} = \sqrt{ab} (a \geq 0, b \geq 0)', 
            r'\sqrt{ab} = \sqrt{a} \cdot \sqrt{b} (a \geq 0, b \geq 0)',
            '\\sqrt{a} \\cdot \\sqrt{b} = \\sqrt{ab} (a \\geq 0, b \\geq 0)', 
            '\\sqrt{ab} = \\sqrt{a} \\cdot \\sqrt{b} (a \\geq 0, b \\geq 0)',
            r"y = \frac{p}{2}",
            r"y = \\frac{p}{2}",
            'k_{AB} = -\\frac{b^2 x_0}{a^2 y_0}'
             r'\sqrt{(x_1 + x_2)^2 - 4x_1 x_2}',
            '\\sqrt{(x_1 + x_2)^2 - 4x_1 x_2}',
            r"$|B_1F_1\right| = a$",
            r"|B_1F_1\right| = a",
            r"\left|B_1F_1\right| = a",
            r"\overrightarrow{CA} = \overrightarrow{OA} - \overrightarrow{OC}",
            r"\sqrt{\frac{a}{b}} = \frac{\sqrt{a}}{\sqrt{b}} (a \geq 0, b > 0)",
            r"30 \times 30 \times 20 = x^2 \times 10，得 x^2 = 1800，",
    r"\therefore x = \pm \sqrt{1800}。",
    r"\therefore x \geq 0，x = 30 \sqrt{2}。",#离谱三个初中数学
            r"\sqrt{a^2} = a (a \geq 0)",
            '\\sqrt{a-2} \\geq 0, \\sqrt{b+\\frac{1}{2}} \\geq 0', #初中数学
              '\\sqrt{a-2} \\geq 0, \\sqrt{b+\\frac{1}{2}} \\geq 0',#初中数学
            r"n = \frac{\omega}{2\pi}",#自动补全
            r"\Delta \theta = \frac{\omega^2 - \omega_0^2}{2\beta}",
            r"H_0 = v_0 \sin \theta t - \frac{1}{2}gt^2",
    r"H_0 = v_0 \sin \theta \frac{v_0 \sin \theta}{g} - \frac{1}{2}g \left( \frac{v_0 \sin \theta}{g} \right)^2",

            r'\frac{E^2}{(R-r)^2+4r}',
            '\\frac{E^2}{(R-r)^2+4r}',
            r'P=IU',
            r"\frac{U_1}{U_2} = \frac{R_1}{R_2}",
            r"\dfrac{(n_1+n_2)e}{t}",
            r"\{M ||MF| = d\}",
            r"\left|AB\right| = \sqrt{1 + \left( \frac{1}{k} \right)^2} \left|y_1 - y_2\right| = \sqrt{1 + \left( \frac{1}{k} \right)^2} \sqrt{(y_1 + y_2)^2 - 4y_1 y_2}",
            r"E=n\frac{\Delta \Phi}{\Delta t}=L\frac{\Delta I}{\Delta t}",
            r"\\frac{E^2R}{(R+r)^2}", 
            r"\\frac{E^2}{(R-r)^2+4r}",
            r"\frac{E^2R}{(R+r)^2}",
              r"\frac{E^2}{(R-r)^2+4r}",
            r"\cos \angle OF_1B_1 = e",
             r"e = \frac{c}{a}",
            r"c^2=b^2+a^2",r"\frac{x^2}{a^2}+\frac{y^2}{b^2}=1",
            r"\overline{v}——物体在\Delta t时间内的平均速度",
            r"\sqrt{(x-2)^2} - \sqrt{(1-2x)^2} = (x-2) + (1-2x) = -x-1",
            r"$$|x-2|+\sqrt{(x+3)^2}+\sqrt{x^2-10x+25}$$",
            r"\frac{\sqrt{2Rh_1}}{\sqrt{2Rh_2}}",
            r"$t=\sqrt{\frac{h}{5}}$",
            r"$h = 5t^2$",
            r"h=5t^2",
            r"$|x-2|+\sqrt{(x+3)^2}+\sqrt{x^2-10x+25}$",
            r"$$|x-2|+\sqrt{(x+3)^2}+\sqrt{x^2-10x+25}$$",
            r"\frac{\sqrt{2Rh_1}}{\sqrt{2Rh_2}}",
            r"\overrightarrow{OM_1}",
            r'\Delta v_{\text{飞}} = 300 - 0 = 300(km/s) = 83(m/s)',
            '\\Delta v_{\\text{飞}} = 300 - 0 = 300(km/s) = 83(m/s)',
            r"\Delta v_{\text{飞}} = 300 - 0 = 300(km/s) = 83(m/s)",
            r"|AB| = \sqrt{1 + k^2} |x_1 - x_2| = \sqrt{1 + k^2} \sqrt{(x_1 + x_2)^2 - 4x_1x_2}",
            r"\left|AB\right| = \sqrt{1 + \left(\frac{1}{k}\right)^2} \left|y_1 - y_2\right| = \sqrt{1 + \left(\frac{1}{k}\right)^2} \sqrt{(y_1 + y_2)^2 - 4y_1y_2}",
            r"h = 5t^2",r"\frac{\Delta \Phi}{R}",r"\omega = 2 \pi n",
            r"\varphi = \alpha = 2\theta = \omega t",r"Q=I \cdot \Delta t=\frac{\overline{E}}{R} \cdot \Delta t=\frac{\Delta \Phi}{R}",
                      r"n \frac{\Delta \Phi}{\Delta t}",r"\eta=\frac{U}{E}=\frac{R}{R+r}"]
        
        filter_correct_formula=[
            r"B=\\frac{F}{I \\cdot L}",
            r"F=Bqv \sin \theta",
            r"\eta = \frac{U}{E} = \frac{R}{R+r}",
            r"\\eta = \\frac{U}{E} = \\frac{R}{R+r}",
            r"I = \\frac{Q}{t}",
            r"|AB| = \\sqrt{1 + k^2} |y_1 - y_2|",
            '|B_1F_1| = b',
            r'E=-\frac{\Delta \Phi}{\Delta t}',
            'E=-\\frac{\\Delta \\Phi}{\\Delta t}',
            r"y^2 = -2px \ (p < 0)",
    r"\left( \frac{p}{2}, 0 \right)",
    r"x = -\frac{p}{2}",
    r"\left( 0, -\frac{p}{2} \right)",
    r"y = -\frac{p}{2}",
        r"y^2 = -2px \ (p < 0)",
        r"\left( \frac{p}{2}, 0 \right)",
        r"x = -\frac{p}{2}",
        r"x^2 = -2py \ (p < 0)",
        r"\left( 0, \frac{p}{2} \right)",
        r"y = -\frac{p}{2}",
        r"y^2 = 2px \ (p > 0)",
        r"\left( \frac{p}{2}, 0 \right)",
        r"x = -\frac{p}{2}",
        r"x^2 = 2py \ (p > 0)",
        r"\left( 0, \frac{p}{2} \right)",
        r"y = -\frac{p}{2}",
        'k_{AB} = -\\frac{b^2}{a^2} \\cdot \\frac{x_0}{y_0}'
    ]
        # for i in range(len(error_formulas)):
        #     if (
        #         error_formulas[i] != corrected_formulas[i] and
        #         all(item not in error_reasons[i] for item in filter_list) and
        #         '\frac{\Delta \Phi}{R}' not in error_formulas[i] and '=' in error_formulas[i]
        #     ):
        #         filtered_result['error_formula'].append(error_formulas[i])
        #         filtered_result['error_reason'].append(error_reasons[i])
        #         filtered_result['corrected_formula'].append(corrected_formulas[i])
        #修正前
        # for i in range(len(error_formulas)):
        #     if (
        #         error_formulas[i] != corrected_formulas[i] and
        #         all(item not in error_reasons[i] for item in filter_list) and
        #         '\frac{\Delta \Phi}{R}' not in error_formulas[i] and
        #         all(item not in error_formulas[i] for item in filter_latex) and 
        #         all(item not in corrected_formulas[i] for item in filter_correct_formula) 
        #     ):
        #         filtered_result['error_formula'].append(error_formulas[i])
        #         filtered_result['error_reason'].append(error_reasons[i])
        #         filtered_result['corrected_formula'].append(corrected_formulas[i])
        #重新修正规则
        for i in range(len(error_formulas)):
            if (
                self.check_numbers_in_string(corrected_formulas[i]) and 
                len(error_formulas[i])!=0 and 
                all(item not in error_reasons[i] for item in filter_list) and
                '\frac{\Delta \Phi}{R}' not in error_formulas[i] and
                all(item not in error_formulas[i] for item in filter_latex) and 
                all((item not in corrected_formulas[i])  for item in filter_correct_formula) 
            ):
                filtered_result['error_formula'].append(error_formulas[i])
                filtered_result['error_reason'].append(error_reasons[i])
                filtered_result['corrected_formula'].append(corrected_formulas[i])
        
        return filtered_result
    

    def filter_text_answer(self,text):
        # 要过滤的关键词
        keywords = ['证明','证明:','证明：','解','解：','解:','例题', '解析：','名师经验谈：','答案','解析','名师经验谈','答案：','解析:','名师经验谈:','答案:','各师经验谈：']
        # 正则表达式匹配标题及其内容
        pattern = re.compile(r'(#{1,6} .+?)(?=\n#{1,6} |\Z)', re.S | re.M)
        
        # 查找所有段落
        matches = pattern.findall(text)
        
        # 过滤段落，仅对标题进行检查
        filtered_paragraphs = []
        last_end = 0
        for match in matches:
            start = text.find(match, last_end)
            end = start + len(match)
            # 提取标题内容中的文字部分
            title = re.sub(r'^[#\d、. ]+', '', match.split('\n')[0]).strip()
            if not any(title == keyword for keyword in keywords):
                filtered_paragraphs.append(text[last_end:start])
                filtered_paragraphs.append(match)
            last_end = end
        
        # 添加最后一部分内容
        filtered_paragraphs.append(text[last_end:])
        
        # 合并过滤后的段落
        filtered_text = ''.join(filtered_paragraphs)
        
        return filtered_text
    
    def formula_checker(self,image_path,prompt):
        if image_path is None or len(image_path)<=0:
            raise EnvironmentError("image_path Errors!")
        # Function to encode the image
        def encode_image(image_path):
            with open(image_path, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode('utf-8')
        # Getting the base64 string
        base64_image = encode_image(image_path)
        logger.info('api-key和endPoints 没有做封装，生产环境注意！')

        api_key = Official_API_KEY
        end_point = Official_OPENAI_URL
   
        timeout = 30

        headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
        }
    
        message = [
            {
            "role": "user",
            "content": [
                {
                "type": "text",
                "text": prompt
                },
                {
                "type": "image_url",
                "image_url": {
                    "url": f"data:image/jpeg;base64,{base64_image}"
                }
                }
            ]
            }
        ]
        #message = [system_prompt, user_prompt]
        payload = {
        "model": "gpt-4o",
        "messages": message,
        "max_tokens": 1000,
        "temperature": 0.0
        }
        
        response = requests.post(url=end_point, headers=headers, json=payload, timeout=timeout)

      
        response_json = response.json()
        logger.info('response_json={}'.format(response_json))
        if response.status_code == 200:
            if 'choices' in response_json and len(response_json['choices']) > 0:
                model_reply = response_json['choices'][0]['message']['content']
                #logger.info('gpt return infos={}'.format(model_reply))
                return model_reply,base64_image
            else:
                logger.info('model_reply is Empty!')
                return ''
        else:
            print("请求服务器错误")
            return ''
        
    def write_token_logs(self,params):
        start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        model_status = params['status']
        url = params['url']
        api_key=params['api-key']
        input_token = params['input_token']
        output_token = params['output_token']
        model = params['model']
        end_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        input_text=params['text']

        DBUtils.insert_llm_log(env=ENV, source_type="TEXT", server_name="formula_corrector",
                                request=input_text[:50], model=model, url=url,
                                api_key=api_key, input_token=input_token,
                                output_token=output_token, start_time=start_time,
                                end_time=end_time, message=model_status)
        return 1
    #{'error': {'message': 'You exceeded your current requests list.', 'type': 'limit_requests', 'param': None, 'code': 'limit_requests'}, 'request_id': '823d3e60-9d84-99b0-9a48-3d07875ff214'}
    def qwen_official_infer(self,system_prompt,user_prompt):
        request_flag = 0
        
        requests_list_model=['qwen-max-latest','qwen-max']
        try:
            client = OpenAI(
                api_key=QWEN_API_KEY,
                base_url=QWEN_URL,
            )
            start_time = time.time()
            start_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(start_time))
            for model_name in requests_list_model:
                completion = client.chat.completions.create(
                    model=model_name,
                    messages=[
                        {'role': 'system', 'content': system_prompt},
                        {'role': 'user', 'content': user_prompt}
                    ],
                    temperature=0.1,
                    seed=42,
                    top_p=0.8
                )
                res_json = completion.model_dump_json()
                res_json = json.loads(res_json)

                # 验证是否请求成功！
                if 'error' not in res_json:
                    logger.info(f'qwen-max infer successfully, response: {res_json}, time: {time.time() - start_time}s')
                    request_flag=1
                    break


            return_params = {
                'status': 'Failed',
                'model': 'qwen-max-latest',
                'text': user_prompt[:3]
            }
            
            try:                

                if 'choices' in res_json and len(res_json['choices']) > 0:
                    content = res_json['choices'][0]['message']['content']
                    return_params['input_token'] = res_json['usage']['prompt_tokens']
                    return_params['output_token'] = res_json['usage']['completion_tokens']
                    return_params['status'] = 'OK'

                    # Logging the token usage
                    end_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
                    DBUtils.insert_llm_log(
                        env='PRO',
                        source_type="TEXT",
                        server_name="qwen_official_infer",
                        request=user_prompt[:10],
                        model=return_params['model'],
                        url="formula_correction",
                        api_key="formula_correction",
                        input_token=return_params['input_token'],
                        output_token=return_params['output_token'],
                        start_time=start_time_str,
                        end_time=end_time_str,
                        message=return_params['status']
                    )

                    return content
                else:
                    logger.error("No valid choices found in response.")
            
            except Exception as e:
                logger.error(f'qwen_official_infer error, error: {str(e)}, response: {res_json}')
        
        except Exception as e:
            logger.error(f'qwen_official_infer failed, error: {str(e)}')

        return ''

    def formula_checker_mini(self,image_path,prompt,text,bool_img=True):
        return_params={}
        if image_path is None or len(image_path)<=0:
            raise EnvironmentError("image_path Errors!")
        # Function to encode the image
        def encode_image(image_path):
            with open(image_path, "rb") as image_file:
                return base64.b64encode(image_file.read()).decode('utf-8')
        # Getting the base64 string
        
        logger.info('api-key和endPoints 没有做封装，生产环境注意！')

        api_key = Official_API_KEY
        end_point = Official_OPENAI_URL
   
        timeout = 180
        
        headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
        }
        
        if bool_img:
            base64_image = encode_image(image_path)
            message = [
                {
                "role": "user",
                "content": [
                    {
                    "type": "text",
                    "text": prompt
                    },
                    {
                    "type": "image_url",
                    "image_url": {
                        "url": f"data:image/jpeg;base64,{base64_image}",
                        "detail": "auto"
                    }
                    }
                ]
                }
            ]
        else:
            message = [
                {
                "role": "user",
                "content": [
                    {
                    "type": "text",
                    "text": prompt
                    }
                ]
                },
            ]
            # message = [
            #     {
            #     "role": "user",
            #     "content": {
            #         "type": "text",
            #         "text": prompt
            #         },
            #     },
                # {
                # "role": "system",
                # "content": {
                #     "type": "text",
                #     "text": '你是一个公式过滤专家，能清楚的分辨出公式与（表达式、运算式）的差别，遵循用户的指令，将公式以及包含公式的相关文本内容提取出来。'
                #     },
                # }
            # ]
            
        #message = [system_prompt, user_prompt]
        payload = {
        "model": "gpt-4o",
        "messages": message,
        "max_tokens": 4096,
        "temperature": 0.1
        }
        proxies = {"http": None, "https": None}
        response = requests.post(url=end_point, headers=headers, json=payload, timeout=timeout, proxies=proxies)
        logger.info(f'gpt4 response={response}')

        response_json = response.json()
        logger.info('response_json={}'.format(response_json))
        #记录日志信息
        return_params['input_token']= response_json['usage']['prompt_tokens']
        return_params['output_token']= response_json['usage']['completion_tokens']
        return_params['status']='Failed'
        return_params['api-key']=api_key
        return_params['url']=end_point
        return_params['model']=response_json['model']
        return_params['text']=text
        

        if response.status_code == 200:
            return_params['status']='OK'
            if 'choices' in response_json and len(response_json['choices']) > 0:
                model_reply = response_json['choices'][0]['message']['content']
                #logger.info('gpt return infos={}'.format(model_reply))
                logger.info('model return success!')
                
                self.write_token_logs(return_params)
                return model_reply
            else:
                logger.info('model_reply is Empty!')
                return ''
        else:
            print("请求服务器错误")
            return ''

    # def layout_Rec(self,image_path):
    #     max_try=3
    #     flag_num=1
    #     while flag_num<max_try:

    #         if image_path is None or len(image_path)<=0:
    #             logger.error('路径错误！image_path={}'.format(image_path))
    #             return []
            
    #         #url = "http://192.168.1.235:30016/v1/dcg_layout"
    #         url = LAYOUT_CHECK_URL
    #         file = open(image_path, "rb")
    #         #logger.info('file={}'.format(file))
    #         #logger.info('image_path={}'.format(image_path))
    #         params = {
    #             "userid": "yxl_110",
    #             "client_id": "dcg-red-list"
    #         }
    #         headers = {"Authorization": "Bearer dcg-MTQ2MDRkYWRmNzRjMDg0ZjZmNTc3YTliMWM0YzYwYmVlZDE="}
    #         try:
    #             #proxies = {"http": None, "https": None}
    #             response = requests.post(url, files={"file": file}, data=params, headers=headers)
                
    #             if response.status_code == 200:
    #                 response=response.json()
        
    #                 return response['data']['Ids_Scores_boxes']
    #             else:
    #                 flag_num+=1
    #                 time.sleep(1)
    #         except Exception as e:
    #             logger.error(f"版面检测调用失败！{e},失败次数={flag_num}")
                
    #     logger.error(f"版面检测完全调用失败！")
    #     return []        
    # def layout_Rec(self,image_path):
        
    #     if image_path is None or len(image_path)<=0:
    #         logger.error('路径错误！image_path={}'.format(image_path))
    #         return []
        
    #     #url = "http://192.168.1.235:30016/v1/dcg_layout"
    #     url = LAYOUT_CHECK_URL
    #     logger.info('in layout_Rec! alrt:api key not save in datasets!')
    #     file = open(image_path, "rb")
    #     #logger.info('file={}'.format(file))
    #     #logger.info('image_path={}'.format(image_path))
    #     params = {
    #         "userid": "dcg-kb",
    #         "client_id": "dcg-red-list"
    #     }
    #     headers = {"Authorization": "Bearer dcg-MTQ2MDRkYWRmNzRjMDg0ZjZmNTc3YTliMWM0YzYwYmVlZDE="}
    #     try:
    #         response = requests.post(url, files={"file": file}, data=params, headers=headers)
    #         if response.status_code == 200:
    #             response=response.json()
    #             # 获取内部 JSON 字符串的值
    #             inner_data = json.loads(response['data'])
    #             Ids_Scores_boxes = json.loads(inner_data['Ids_Scores_boxes'])
    #             return Ids_Scores_boxes
    #         else:
    #             logger.error('layout 检测失败， image_path={}'.format(image_path))
    #             return []
    #     except Exception as e:
    #         logger.error('Layout Detection Error! image_path={},e={}'.format(image_path,e))
    #         return []
    def layout_Rec(self, image_path):
        if image_path is None or len(image_path) <= 0:
            logger.error('路径错误！image_path={}'.format(image_path))
            return []

        url = LAYOUT_CHECK_URL
        logger.info('in layout_Rec! alrt:api key not save in datasets!')
        
        with open(image_path, "rb") as file:
            params = {
                "userid": "dcg-kb",
                "client_id": "dcg-red-list"
            }
            headers = {"Authorization": "Bearer dcg-MTQ2MDRkYWRmNzRjMDg0ZjZmNTc3YTliMWM0YzYwYmVlZDE="}

            try:
                response = requests.post(url, files={"file": file}, data=params, headers=headers)

                if response.status_code == 200:
                    response_data = response.json()  # Already a dict, no need for json.loads()
                    logger.info(f'LayOut_response_data={response_data}')
                    if 'data' in response_data :
                        Ids_Scores_boxes=response_data['data']['Ids_Scores_boxes']
                        return Ids_Scores_boxes
                      
                    else:
                        logger.error('Invalid format for data in response.')

                else:
                    logger.error(f'layout 检测失败， image_path={image_path}, status_code={response.status_code}')
            
            except Exception as e:
                logger.error(f'Layout Detection Error! image_path={image_path}, e={str(e)}')

        return []
    
    def layout_Rec_19(self, image_path):
        if image_path is None or len(image_path) <= 0:
            logger.error('路径错误！image_path={}'.format(image_path))
            return []

        url = LAYOUT_CHECK_URL_19
        logger.info('in layout_Rec! alrt:api key not save in datasets!')
        
        with open(image_path, "rb") as file:
            params = {
                "userid": "dcg-kb",
                "client_id": "dcg-red-list"
            }
            headers = {"Authorization": "Bearer dcg-MTQ2MDRkYWRmNzRjMDg0ZjZmNTc3YTliMWM0YzYwYmVlZDE="}

            try:
                response = requests.post(url, files={"file": file}, data=params, headers=headers)

                if response.status_code == 200:
                    response_data = response.json()["data"]  # Already a dict, no need for json.loads()
                    logger.info(f'LayOut_response_data={response_data}')
                    if response_data["boxes_num"]>0:
                        Ids_Scores_boxes=response_data['boxes']
                        #重新组装内容
                        box_detection_list=[[1,0.9,box] for box in Ids_Scores_boxes]
                        return box_detection_list
                      
                    else:
                        logger.error('no detection boxes by layout server!')
                        return []

                else:
                    logger.error(f'layout 检测失败， image_path={image_path}, status_code={response.status_code}')
                    return []
            except Exception as e:
                logger.error(f'Layout Detection Error! image_path={image_path}, e={str(e)}')
                return []
        return []
    
    def get_sub_img_paths(self, image_path, formula_positions, output_folder):
        try:
            logger.info('in get_sub_img_paths')
            logger.info('image_path={},formula_positions={}'.format(image_path, formula_positions))
            
            # 检查输入图像路径是否存在
            if not os.path.exists(image_path):
                logger.info(f"Image file '{image_path}' does not exist.")
                raise FileNotFoundError(f"Image file '{image_path}' does not exist.")
            
            # 读取图像
            image = cv2.imread(image_path)
            if image is None:
                logger.info(f"Failed to read the image file '{image_path}'.")
                raise ValueError(f"Failed to read the image file '{image_path}'.")
            
            # 创建输出文件夹
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)

            now_data = self.get_day_time()
            output_folder = os.path.join(output_folder, now_data)
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)
            
            # 存储提取图像路径的列表
            extracted_image_paths = []
            
            # 提取感兴趣区域并保存
            for i, pos in enumerate(formula_positions):
                try:
                    # 检查感兴趣区域的坐标是否合理
                    if len(pos) != 4:
                        logger.info(f"Invalid position data at index {i}: {pos}")
                        raise ValueError(f"Invalid position data at index {i}: {pos}")

                    x1, y1, x2, y2 = map(int, pos)
                    
                    # 向外扩展20个像素，确保不超出图像边界
                    x1 = max(0, x1 - 10)
                    y1 = max(0, y1 - 10)
                    x2 = min(image.shape[1], x2 + 10)
                    y2 = min(image.shape[0], y2 + 10)
                    
                    if x1 >= x2 or y1 >= y2:
                        raise ValueError(f"Invalid coordinates at index {i}: {pos}")

                    # 提取感兴趣区域
                    roi = image[y1:y2, x1:x2]
                    if roi.size == 0:
                        raise ValueError(f"Empty ROI at index {i}: {pos}")

                    # 保存提取的图像
                    output_path = os.path.join(output_folder, f"formula_{i+1}.png")
                    cv2.imwrite(output_path, roi)
                    
                    extracted_image_paths.append(output_path)
                except Exception as e:
                    print(f"Error processing position {i}: {e}")

            return extracted_image_paths
        except Exception as e:
            print(f"An error occurred: {e}")
            return []

    def enlarge_image(self, image, scale_factor=2):
        # 获取原图像的尺寸
        width = int(image.shape[1] * scale_factor)
        height = int(image.shape[0] * scale_factor)
        # 使用Lanczos插值方法进行图像放大
        enlarged_image = cv2.resize(image, (width, height), interpolation=cv2.INTER_LANCZOS4)
        return enlarged_image

    def get_sub_img_paths_enhanced(self, image_path, formula_positions, output_folder):
        try:
            logger.info('in get_sub_img_paths')
            logger.info('image_path={},formula_positions={}'.format(image_path, formula_positions))
            
            # 检查输入图像路径是否存在
            if not os.path.exists(image_path):
                logger.info(f"Image file '{image_path}' does not exist.")
                raise FileNotFoundError(f"Image file '{image_path}' does not exist.")
            
            # 读取图像
            image = cv2.imread(image_path)
            if image is None:
                logger.info(f"Failed to read the image file '{image_path}'.")
                raise ValueError(f"Failed to read the image file '{image_path}'.")
            
            # 创建输出文件夹
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)

            now_data = self.get_day_time()
            output_folder = os.path.join(output_folder, now_data)
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)
            
            # 存储提取图像路径的列表
            extracted_image_paths = []
            
            # 提取感兴趣区域并保存
            for i, pos in enumerate(formula_positions):
                try:
                    # 检查感兴趣区域的坐标是否合理
                    if len(pos) != 4:
                        logger.info(f"Invalid position data at index {i}: {pos}")
                        raise ValueError(f"Invalid position data at index {i}: {pos}")

                    x1, y1, x2, y2 = map(int, pos)
                    
                    # 向外扩展20个像素，确保不超出图像边界
                    x1 = max(0, x1 - 10)
                    y1 = max(0, y1 - 10)
                    x2 = min(image.shape[1], x2 + 10)
                    y2 = min(image.shape[0], y2 + 10)
                    
                    if x1 >= x2 or y1 >= y2:
                        raise ValueError(f"Invalid coordinates at index {i}: {pos}")

                    # 提取感兴趣区域
                    roi = image[y1:y2, x1:x2]
                    if roi.size == 0:
                        raise ValueError(f"Empty ROI at index {i}: {pos}")

                    # 增强提取的图像
                    enlarged_roi = self.enlarge_image(roi)
                    logger.info('增强图像')
                    # 保存提取的图像
                    output_path = os.path.join(output_folder, f"formula_{i+1}.png")
                    cv2.imwrite(output_path, enlarged_roi)
                    
                    extracted_image_paths.append(output_path)
                except Exception as e:
                    print(f"Error processing position {i}: {e}")

            return extracted_image_paths
        except Exception as e:
            print(f"An error occurred: {e}")
            return []

    def get_sub_img_paths_enhanced_v2(self, image_path, formula_positions, output_folder):
        try:
            #logger.info('in get_sub_img_paths')
            logger.info('image_path={},formula_positions={}'.format(image_path, formula_positions))
            
            # 检查输入图像路径是否存在
            if not os.path.exists(image_path):
                logger.info(f"Image file '{image_path}' does not exist.")
                raise FileNotFoundError(f"Image file '{image_path}' does not exist.")
            
            # 读取图像
            image = cv2.imread(image_path)
            if image is None:
                logger.info(f"Failed to read the image file '{image_path}'.")
                raise ValueError(f"Failed to read the image file '{image_path}'.")
            
            # 创建输出文件夹
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)

            now_data = self.get_day_time()
            output_folder = os.path.join(output_folder, now_data)
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)
            
            # 存储提取图像路径的列表
            extracted_image_paths = []
            
            # 提取感兴趣区域并保存
            for i, pos in enumerate(formula_positions):
                try:
                    # 检查感兴趣区域的坐标是否合理
                    if len(pos) != 4:
                        logger.info(f"Invalid position data at index {i}: {pos}")
                        raise ValueError(f"Invalid position data at index {i}: {pos}")

                    x1, y1, x2, y2 = map(int, pos)
                    
                    # 向外扩展20个像素，确保不超出图像边界
                    x1 = max(0, x1 - 15)
                    y1 = max(0, y1 - 15)
                    x2 = min(image.shape[1], x2 + 15)
                    y2 = min(image.shape[0], y2 + 15)
                    
                    if x1 >= x2 or y1 >= y2:
                        raise ValueError(f"Invalid coordinates at index {i}: {pos}")

                    # 提取感兴趣区域
                    roi = image[y1:y2, x1:x2]
                    if roi.size == 0:
                        raise ValueError(f"Empty ROI at index {i}: {pos}")

                    # 增强提取的图像
                    enlarged_roi = self.enlarge_image(roi)
                    #logger.info('增强图像')
            
                    # # 如果子图大小为整张图大小的3/4及以上，对其进行二分
                    # if (x2 - x1) >= (0.75 * image.shape[1]) and (y2 - y1) >= (0.75 * image.shape[0]):
                    #     logger.info(f"Sub-image at index {i} is too large, splitting into smaller sections.")
                    #     mid_y = y1 + (y2 - y1) // 2
                    #     for j, (start_y, end_y) in enumerate([(y1, mid_y), (mid_y, y2)]):
                    #         sub_roi = image[start_y:end_y, x1:x2]
                    #         sub_enlarged_roi = self.enlarge_image(sub_roi)
                    #         sub_output_path = os.path.join(output_folder, f"formula_{i+1}_{j+1}.png")
                    #         cv2.imwrite(sub_output_path, sub_enlarged_roi)
                    #         extracted_image_paths.append(sub_output_path)
                    # 如果子图大小为整张图大小的3/4及以上，且子图的宽度或高度大于400像素，对其进行二分
                    if ((x2 - x1) * (y2 - y1)) >= (0.5 * image.shape[1] * image.shape[0]) and ((x2 - x1) > 600 or (y2 - y1) > 600):
                        logger.info(f"Sub-image at index {i} is too large, splitting into smaller sections.")
                        mid_y = y1 + (y2 - y1) // 2
                        for j, (start_y, end_y) in enumerate([(y1, mid_y), (mid_y, y2)]):
                            sub_roi = image[start_y:end_y, x1:x2]
                            sub_enlarged_roi = self.enlarge_image(sub_roi)
                            sub_output_path = os.path.join(output_folder, f"formula_{i+1}_{j+1}.png")
                            cv2.imwrite(sub_output_path, sub_enlarged_roi)
                            extracted_image_paths.append(sub_output_path)
                    else:
                        # 保存提取的图像
                        output_path = os.path.join(output_folder, f"formula_{i+1}.png")
                        cv2.imwrite(output_path, enlarged_roi)
                        extracted_image_paths.append(output_path)

                except Exception as e:
                    logger.error(f"Error processing position {i}: {e}")

            return extracted_image_paths
        except Exception as e:
            logger.error(f"An error occurred: {e}")
            return []
    def get_sub_img_paths_enhanced_v4(self, image_path, formula_positions, output_folder):
        try:
          
            # 检查输入图像路径是否存在
            if not os.path.exists(image_path):
                logger.info(f"Image file '{image_path}' does not exist.")
                raise FileNotFoundError(f"Image file '{image_path}' does not exist.")
            
            # 读取图像
            image = cv2.imread(image_path)
            if image is None:
                logger.info(f"Failed to read the image file '{image_path}'.")
                raise ValueError(f"Failed to read the image file '{image_path}'.")
            now_times_miles=get_millisecond_time()
            # 创建输出文件夹
            # output_folder=os.path.join(output_folder,str(now_times_miles))
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)


            #unique_file_name=image_path.split('/')[-1].split('.')[0]
            
            #output_folder = os.path.join(output_folder, unique_file_name)

            # if not os.path.exists(output_folder):
            #     os.makedirs(output_folder)
            
            # 存储提取图像路径的列表
            extracted_image_paths = []
            
            # 提取感兴趣区域并保存
            for i, pos in enumerate(formula_positions):
                try:
                    # 检查感兴趣区域的坐标是否合理
                    if len(pos) != 4:
                        logger.info(f"Invalid position data at index {i}: {pos}")
                        raise ValueError(f"Invalid position data at index {i}: {pos}")

                    x1, y1, x2, y2 = map(int, pos)
                    
                    # 向外扩展20个像素，确保不超出图像边界
                    x1 = max(0, x1 - 20)
                    y1 = max(0, y1 - 20)
                    x2 = min(image.shape[1], x2 + 20)
                    y2 = min(image.shape[0], y2 + 20)
                    
                    if x1 >= x2 or y1 >= y2:
                        raise ValueError(f"Invalid coordinates at index {i}: {pos}")

                    # 提取感兴趣区域
                    roi = image[y1:y2, x1:x2]
                    if roi.size == 0:
                        raise ValueError(f"Empty ROI at index {i}: {pos}")
                    #enlarged_roi=roi
                    # 增强提取的图像
                    enlarged_roi = self.enlarge_image(roi)
                    
                    output_path = os.path.join(output_folder,f"formula_{i+1}.png")
                    cv2.imwrite(output_path, enlarged_roi)
                    extracted_image_paths.append(output_path)

                except Exception as e:
                    logger.error(f"Error processing position {i}: {e}")

            return extracted_image_paths
        except Exception as e:
            logger.error(f"An error occurred: {e}")
            return []
    def get_sub_img_paths_enhanced_NoClip(self, image_path, formula_positions, output_folder):
        try:
            #logger.info('in get_sub_img_paths')
            logger.info('image_path={},formula_positions={}'.format(image_path, formula_positions))
            
            # 检查输入图像路径是否存在
            if not os.path.exists(image_path):
                logger.info(f"Image file '{image_path}' does not exist.")
                raise FileNotFoundError(f"Image file '{image_path}' does not exist.")
            
            # 读取图像
            image = cv2.imread(image_path)
            if image is None:
                logger.info(f"Failed to read the image file '{image_path}'.")
                raise ValueError(f"Failed to read the image file '{image_path}'.")
            
            # 创建输出文件夹
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)

            now_data = self.get_day_time()
            output_folder = os.path.join(output_folder, now_data)
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)
            
            # 存储提取图像路径的列表
            extracted_image_paths = []
            
            # 提取感兴趣区域并保存
            for i, pos in enumerate(formula_positions):
                try:
                    # 检查感兴趣区域的坐标是否合理
                    if len(pos) != 4:
                        logger.info(f"Invalid position data at index {i}: {pos}")
                        raise ValueError(f"Invalid position data at index {i}: {pos}")

                    x1, y1, x2, y2 = map(int, pos)
                    
                    # 向外扩展20个像素，确保不超出图像边界
                    x1 = max(0, x1 - 5)
                    y1 = max(0, y1 - 5)
                    x2 = min(image.shape[1], x2 + 5)
                    y2 = min(image.shape[0], y2 + 5)
                    
                    if x1 >= x2 or y1 >= y2:
                        raise ValueError(f"Invalid coordinates at index {i}: {pos}")

                    # 提取感兴趣区域
                    roi = image[y1:y2, x1:x2]
                    if roi.size == 0:
                        raise ValueError(f"Empty ROI at index {i}: {pos}")
                    enlarged_roi=roi
                    # 增强提取的图像
                    #enlarged_roi = self.enlarge_image(roi)
                    #logger.info('增强图像')
            
                    # # 如果子图大小为整张图大小的3/4及以上，对其进行二分
                    # if (x2 - x1) >= (0.75 * image.shape[1]) and (y2 - y1) >= (0.75 * image.shape[0]):
                    #     logger.info(f"Sub-image at index {i} is too large, splitting into smaller sections.")
                    #     mid_y = y1 + (y2 - y1) // 2
                    #     for j, (start_y, end_y) in enumerate([(y1, mid_y), (mid_y, y2)]):
                    #         sub_roi = image[start_y:end_y, x1:x2]
                    #         sub_enlarged_roi = self.enlarge_image(sub_roi)
                    #         sub_output_path = os.path.join(output_folder, f"formula_{i+1}_{j+1}.png")
                    #         cv2.imwrite(sub_output_path, sub_enlarged_roi)
                    #         extracted_image_paths.append(sub_output_path)
                    # 如果子图大小为整张图大小的3/4及以上，且子图的宽度或高度大于400像素，对其进行二分
                    
                    # 保存提取的图像
                    output_path = os.path.join(output_folder, f"formula_{i+1}.png")
                    cv2.imwrite(output_path, enlarged_roi)
                    extracted_image_paths.append(output_path)

                except Exception as e:
                    logger.error(f"Error processing position {i}: {e}")

            return extracted_image_paths
        except Exception as e:
            logger.error(f"An error occurred: {e}")
            return []
    def get_sub_img_paths_enhanced_v3(self, image_path, formula_positions, output_folder):
        try:
            #logger.info('in get_sub_img_paths')
            logger.info('image_path={},formula_positions={}'.format(image_path, formula_positions))
            
            # 检查输入图像路径是否存在
            if not os.path.exists(image_path):
                logger.info(f"Image file '{image_path}' does not exist.")
                raise FileNotFoundError(f"Image file '{image_path}' does not exist.")
            
            # 读取图像
            image = cv2.imread(image_path)
            if image is None:
                logger.info(f"Failed to read the image file '{image_path}'.")
                raise ValueError(f"Failed to read the image file '{image_path}'.")
            
            # 创建输出文件夹
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)

            now_data = self.get_day_time()
            output_folder = os.path.join(output_folder, now_data)
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)
            
            # 存储提取图像路径的列表
            extracted_image_paths = []
            
            # 提取感兴趣区域并保存
            for i, pos in enumerate(formula_positions):
                try:
                    # 检查感兴趣区域的坐标是否合理
                    if len(pos) != 4:
                        logger.info(f"Invalid position data at index {i}: {pos}")
                        raise ValueError(f"Invalid position data at index {i}: {pos}")

                    x1, y1, x2, y2 = map(int, pos)
                    
                    # 向外扩展20个像素，确保不超出图像边界
                    x1 = max(0, x1 - 10)
                    y1 = max(0, y1 - 15)
                    x2 = min(image.shape[1], x2 + 10)
                    y2 = min(image.shape[0], y2 + 15)
                    
                    if x1 >= x2 or y1 >= y2:
                        raise ValueError(f"Invalid coordinates at index {i}: {pos}")

                    # 提取感兴趣区域
                    roi = image[y1:y2, x1:x2]
                    if roi.size == 0:
                        raise ValueError(f"Empty ROI at index {i}: {pos}")

                    # 增强提取的图像
                    enlarged_roi = self.enlarge_image(roi)
                    #logger.info('增强图像')
                    
                    if ((x2 - x1) * (y2 - y1)) >= (0.75 * image.shape[1] * image.shape[0]) and ((x2 - x1) > 800 or (y2 - y1) > 800):
                        logger.info('子图裁剪-----------------------')
                        logger.info(f"Sub-image at index {i} is too large, splitting into smaller sections.")
                        if (x2 - x1) > (y2 - y1):  # 横向裁剪
                            mid_x = x1 + (x2 - x1) // 2
                            for j, (start_x, end_x) in enumerate([(x1, mid_x), (mid_x, x2)]):
                                sub_roi = image[y1:y2, start_x:end_x]
                                sub_enlarged_roi = self.enlarge_image(sub_roi)
                                sub_output_path = os.path.join(output_folder, f"formula_{i+1}_{j+1}.png")
                                cv2.imwrite(sub_output_path, sub_enlarged_roi)
                                extracted_image_paths.append(sub_output_path)
                        else:  # 竖向裁剪
                            mid_y = y1 + (y2 - y1) // 2
                            for j, (start_y, end_y) in enumerate([(y1, mid_y), (mid_y, y2)]):
                                sub_roi = image[start_y:end_y, x1:x2]
                                sub_enlarged_roi = self.enlarge_image(sub_roi)
                                sub_output_path = os.path.join(output_folder, f"formula_{i+1}_{j+1}.png")
                                cv2.imwrite(sub_output_path, sub_enlarged_roi)
                                extracted_image_paths.append(sub_output_path)
                    else:
                        # 保存提取的图像
                        output_path = os.path.join(output_folder, f"formula_{i+1}.png")
                        cv2.imwrite(output_path, enlarged_roi)
                        extracted_image_paths.append(output_path)
                except Exception as e:
                    logger.error(f"Error processing position {i}: {e}")
            return extracted_image_paths
        except Exception as e:
            logger.error(f"An error occurred: {e}")
            return []

    def Image2Latex(self, img_path, inference_mode='cuda', num_beam=1, mix=True):
        """
        Posts an image to the specified server URL and returns the server's response and the elapsed time.

        Parameters:
        server_url (str): The URL of the server to post the image to.
        img_path (str): The path to the image file to be posted.

        Returns:
        tuple: A tuple containing the response from the server and the elapsed time for the request.
        """
        rec_server_url = "http://localhost:8816/predict/img2latex"
        logger.info('Image2Latex URL needed to certain!')
        with open(img_path, "rb") as image_file:
            files = {"image": image_file}
            data = {
                "inference_mode": inference_mode,
                "num_beam": num_beam,
                "mix": str(mix).lower()  # 将布尔值转换为字符串形式
            }
            response = requests.post(rec_server_url, files=files, data=data)
            
        if response.status_code == 200:
            logger.info("img2latex成功 预测结果:{}".format(response.json()))
            return response.json()
        else:
            logger.info('img2latex失败'.format(response))
            return {'result':''}
       



    #将子图的宽度都拉到同一批数据的统一的值，实现宽度拉伸
    def get_sub_img_paths_enhanced_v3(self, image_path, formula_positions, output_folder):
        try:
            #logger.info('in get_sub_img_paths')
            logger.info('image_path={},formula_positions={}'.format(image_path, formula_positions))
            
            # 检查输入图像路径是否存在
            if not os.path.exists(image_path):
                logger.info(f"Image file '{image_path}' does not exist.")
                raise FileNotFoundError(f"Image file '{image_path}' does not exist.")
            
            # 读取图像
            image = cv2.imread(image_path)
            if image is None:
                logger.info(f"Failed to read the image file '{image_path}'.")
                raise ValueError(f"Failed to read the image file '{image_path}'.")
            
            # 创建输出文件夹
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)

            now_data = self.get_day_time()
            output_folder = os.path.join(output_folder, now_data)
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)
            
            max_width = 0

            for position in formula_positions:
                coordinates = position[2]
                width = coordinates[2] - coordinates[0]
                if width > max_width:
                    max_width = width

            max_width

            # 存储提取图像路径的列表
            extracted_image_paths = []
            
            # 提取感兴趣区域并保存
            for i, pos in enumerate(formula_positions):
                try:
                    # 检查感兴趣区域的坐标是否合理
                    if len(pos) != 4:
                        logger.info(f"Invalid position data at index {i}: {pos}")
                        raise ValueError(f"Invalid position data at index {i}: {pos}")

                    x1, y1, x2, y2 = map(int, pos)
                    
                    # 向外扩展20个像素，确保不超出图像边界
                    x1 = max(0, x1 - 10)
                    y1 = max(0, y1 - 10)
                    x2 = min(image.shape[1], x2 + 10)
                    y2 = min(image.shape[0], y2 + 10)
                    
                    if x1 >= x2 or y1 >= y2:
                        raise ValueError(f"Invalid coordinates at index {i}: {pos}")

                    # 提取感兴趣区域
                    roi = image[y1:y2, x1:x2]
                    if roi.size == 0:
                        raise ValueError(f"Empty ROI at index {i}: {pos}")

                    # 增强提取的图像
                    enlarged_roi = self.enlarge_image(roi)
                    #logger.info('增强图像')
            
                    # # 如果子图大小为整张图大小的3/4及以上，对其进行二分
                    # if (x2 - x1) >= (0.75 * image.shape[1]) and (y2 - y1) >= (0.75 * image.shape[0]):
                    #     logger.info(f"Sub-image at index {i} is too large, splitting into smaller sections.")
                    #     mid_y = y1 + (y2 - y1) // 2
                    #     for j, (start_y, end_y) in enumerate([(y1, mid_y), (mid_y, y2)]):
                    #         sub_roi = image[start_y:end_y, x1:x2]
                    #         sub_enlarged_roi = self.enlarge_image(sub_roi)
                    #         sub_output_path = os.path.join(output_folder, f"formula_{i+1}_{j+1}.png")
                    #         cv2.imwrite(sub_output_path, sub_enlarged_roi)
                    #         extracted_image_paths.append(sub_output_path)
                    # 如果子图大小为整张图大小的3/4及以上，且子图的宽度或高度大于400像素，对其进行二分
                    if ((x2 - x1) >= (0.75 * image.shape[1]) or (y2 - y1) >= (0.75 * image.shape[0])) and ((x2 - x1) > 600 and (y2 - y1) > 600):
                        logger.info(f"Sub-image at index {i} is too large, splitting into smaller sections.")
                        mid_y = y1 + (y2 - y1) // 2
                        for j, (start_y, end_y) in enumerate([(y1, mid_y), (mid_y, y2)]):
                            sub_roi = image[start_y:end_y, x1:x2]
                            sub_enlarged_roi = self.enlarge_image(sub_roi)
                            sub_output_path = os.path.join(output_folder, f"formula_{i+1}_{j+1}.png")
                            cv2.imwrite(sub_output_path, sub_enlarged_roi)
                            extracted_image_paths.append(sub_output_path)
                    else:
                        # 保存提取的图像
                        output_path = os.path.join(output_folder, f"formula_{i+1}.png")
                        cv2.imwrite(output_path, enlarged_roi)
                        extracted_image_paths.append(output_path)

                except Exception as e:
                    logger.error(f"Error processing position {i}: {e}")

            return extracted_image_paths
        except Exception as e:
            logger.error(f"An error occurred: {e}")
            return []
    # Make sure that self.get_day_time() and self.enlarge_image() methods are properly defined in your class.
    # 获取毫秒级时间
    def get_millisecond_time(self):
        current_time = datetime.now()
        time_str = current_time.strftime("%Y%m%d%H%M%S%f")[:-3]
        return time_str
    
    def get_day_time(self):
        # 获取当前日期和时间
        now = datetime.now()
        # 格式化日期和时间为字符串，格式为 "YYYYMMDD_HHMMSS"
        formatted_time = now.strftime("%Y%m%d_%H%M%S")
        return formatted_time

    

    def convert_to_dict(self,all_res):
        result = []
        for item in all_res:
            try:
                # 将字符串转换为字典
                converted_item = ast.literal_eval(item)
                result.append(converted_item)
            except Exception as e:
                print(f"Error converting item: {item}, error: {e}")
                result.append(item)  # 保留原始字符串以防转换失败
        return result
    # def save_to_html():
    

    def filter_detection_results(self,S):
        """
        过滤出confidence大于0.8的结果

        参数：
        - S (list): 检测结果列表

        返回：
        - bool: 如果过滤后的结果个数大于1，返回True，否则返回False
        - list: 过滤后的结果列表
        """
        filtered_results = [result for result in S if result['confidence'] > 0.8]
        return len(filtered_results) > 1

    def annotate_image_with_detections(self,original_image_path, output_image_path, detections):
        """
        在图像上绘制检测模型的结果并保存。

        参数：
        - original_image_path (str): 原始图像文件路径
        - output_image_path (str): 保存带注释的图像路径
        - detections (list): 检测结果列表，每个元素应包含 'p'（坐标）、'h'（高度）、'w'（宽度）、'label' 和 'confidence'

        返回：
        - None
        """
        image = cv2.imread(original_image_path)
        
        for item in detections:
            x = item['p']['x']
            y = item['p']['y']
            width = item['w']
            height = item['h']
            confidence = item['confidence']

            color = (0, 255, 0) if confidence > 0.8 else (0, 0, 255)
            cv2.rectangle(image, (x, y), (x + width, y + height), color, 2)
            label_text = f"{item['label']} ({confidence:.2f})"
            cv2.putText(image, label_text, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)

        cv2.imwrite(output_image_path, image)

    def formula_detection_fuc(self,img_path):
        """
        处理图像路径，调用检测服务并过滤检测结果。

        参数：
        - img_path (str): 输入图像路径

        返回：
        - bool: 是否有足够高置信度的结果
        - str: 错误消息（如果有的话）
        """
        with open(img_path, 'rb') as img:
            files = {'img': img}
            try:
                response = requests.post(FORMULA_DETECTION_URL, files=files)
            except requests.exceptions.RequestException as e:
                logger.error(f"请求公式检测服务失败: {e}")
                return False, f"请求公式检测服务失败: {e}"

            if response.status_code != 200:
                logger.error(f"检测服务返回错误: {response.status_code}")
                return False, f"检测服务返回错误: {response.status_code}"

            try:
                response_infos = json.loads(response.text)
            except json.JSONDecodeError as e:
                logger.error(f"解析检测服务返回结果失败: {e}")
                return False, "解析检测服务返回结果失败"
            #返回结果
            try:
                has_valid_detections = self.filter_detection_results(response_infos)
                return has_valid_detections,'检查成功'
            except json.JSONDecodeError as e:
                logger.error(f"解析检测服务返回结果失败: {e}")
                return False, "解析检测服务返回结果失败"
    
    def clean_return_results(self,formula_results):
        def check_bool(text):
            """
            检查LaTeX文本中是否包含大于10的数字。

            参数：
                text (str): LaTeX文本字符串。

            返回：
                bool: 如果所有数字都小于等于10，返回True；否则返回False。
            """
            if len(text)==0:
                return False
            # 使用正则表达式匹配数字
            numbers = re.findall(r'\b\d+\b', text)
            
            # 检查是否存在大于10的数字
            for number in numbers:
                if int(number) > 10:
                    return False

            return True
        
        return_results=[]
        for block in formula_results:
            if check_bool(block['corrected_formula']):
                return_results.append(block)
        return return_results
    def is_break(self, text):
        # 定义关键词列表
        keywords = [
            '证明',  '解', '例题', '名师经验谈',  '解析',  '答案', 'A.','B.','C.','D.'
        ]
        
        # 检查关键词是否存在于文本中
        for keyword in keywords:
            if keyword in text:
                return False  # 如果发现关键词，返回 False
        
        return True  # 如果所有关键词都不存在，返回 True

if __name__ == '__main__':
    formula_tool=Formula_Checker()
    img_path = "/data/wangtengbo/formula_TexTeller/TexTeller/src/9c18ed3e78b34fb7b31c9c73d044fcac.jpg"
    result, message = formula_tool.formula_detection_fuc(img_path)

    if not result:
        logger.info(f"处理结果: {message}")
    else:
        logger.info("处理成功并生成可视化图像。")

        
    
