import re
import json

def calculate_iou(inner_box, outer_box):
    # 提取内部边界框的四个顶点
    x1, y1 = inner_box[0]
    x2, y2 = inner_box[1]
    x3, y3 = inner_box[2]
    x4, y4 = inner_box[3]

    # 计算内部边界框的最小和最大坐标
    x_min_inner = min(x1, x2, x3, x4)
    y_min_inner = min(y1, y2, y3, y4)
    x_max_inner = max(x1, x2, x3, x4)
    y_max_inner = max(y1, y2, y3, y4)

    # 提取外部边界框的坐标
    x_min_outer, y_min_outer, x_max_outer, y_max_outer = outer_box

    # 计算交集的坐标
    x_min_inter = max(x_min_inner, x_min_outer)
    y_min_inter = max(y_min_inner, y_min_outer)
    x_max_inter = min(x_max_inner, x_max_outer)
    y_max_inter = min(y_max_inner, y_max_outer)

    # 计算交集的宽度和高度
    inter_width = max(0, x_max_inter - x_min_inter)
    inter_height = max(0, y_max_inter - y_min_inter)

    # 计算交集面积
    inter_area = inter_width * inter_height

    # 计算两个边界框的面积
    inner_area = (x_max_inner - x_min_inner) * (y_max_inner - y_min_inner)
    outer_area = (x_max_outer - x_min_outer) * (y_max_outer - y_min_outer)

    # 计算并集面积
    union_area = inner_area + outer_area - inter_area

    # 计算IoU
    iou = inter_area / union_area if union_area != 0 else 0

    return iou



def has_intersection(inner_box, outer_box, threshold=0.1):
    """
    判断inner_box是否与outer_box有交集（IOU > 0.1）。

    参数:
    inner_box (list): 内部边界框，格式为[[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
    outer_box (list): 外部边界框，格式为[x_min, y_min, x_max, y_max]
    threshold (float): 判断交集的IOU阈值，默认为0.1

    返回:
    bool: 如果inner_box与outer_box有交集（IOU > threshold）则返回True，否则返回False。
    """
    iou = calculate_iou(inner_box, outer_box)
    return iou > 0

# def perform_re_check(text):
#     """
#     检查文本中是否包含公式。

#     参数:
#     text (str): 要检查的文本

#     返回:
#     bool: 如果包含公式则返回True，否则返回False。
#     """
#     formula_pattern = re.compile(
#         r"([A-Za-z]+\s*=\s*[A-Za-z0-9+\-*/^()]+)|"  # 一般公式，包含多个运算符或字母
#         r"(\b√[A-Za-z0-9]+\b)|"                     # 根号
#         r"(\bΔ\b)|"                                 # Δ
#         r"(\([A-Za-z0-9+\-*/^()]+\)\s*[+\-*/^]\s*\([A-Za-z0-9+\-*/^()]+\))|"  # 复杂括号表达式
#         r"([A-Za-z]*\d*[(x)(y)(z)(a)(b)(c)]*\s*[+\-*/^]+\s*\(?[A-Za-z0-9+\-*/^()]+\)?)"  # 公式中的符号运算
#     )
#     return bool(formula_pattern.search(text))
def perform_re_check(text):
    # 规则1：过滤掉包含仅字母、空格或中文字符的文本
    if re.fullmatch(r'[\u4e00-\u9fa5a-zA-Z\s]+', text):
        return False
    
    # 规则2：过滤掉不带“=”符号的text
    if '=' not in text:
        return False
    
    # 规则3：过滤掉表达式和赋值，例如a=5
    if re.fullmatch(r'[a-zA-Z]+\s*=\s*\d+', text):
        return False
    
    return True
def filter_boxes(ocr_result, layout_result):
    """
    过滤排版检测结果，保留下包含公式的边界框。

    参数:
    ocr_result (dict): OCR识别结果
    layout_result (dict): 布局检测结果

    返回:
    list: 过滤后的排版检测结果
    """
    layout_data = json.loads(json.loads(layout_result['data'])['Ids_Scores_boxes'])
    filtered_layout_boxes = []
    filtered_layout_ocrs=[]
    for layout_box in layout_data:
        layout_coordinates = layout_box[2]
        combined_text = ""
        for ocr_box in ocr_result['data']:
            ocr_coordinates = ocr_box[0]
            ocr_text = ocr_box[1][0]
            if has_intersection(ocr_coordinates, layout_coordinates):
                combined_text += ocr_text + " "
        #print(combined_text)
        if perform_re_check(combined_text.strip()):
            filtered_layout_boxes.append(layout_box)
            filtered_layout_ocrs.append(combined_text)
    
    return filtered_layout_boxes,filtered_layout_ocrs

# # # # 示例使用
# ocr_result = {'errorCode': 0, 'msg': '识别成功', 'data': [[[[132.0, 6.0], [487.0, 6.0], [487.0, 23.0], [132.0, 23.0]], ['1.4.2用空间向量研究距离、夹角问题（一）', 0.9909250140190125]], [[[274.0, 57.0], [348.0, 57.0], [348.0, 77.0], [274.0, 77.0]], ['空间距离', 0.9974817037582397]], [[[50.0, 105.0], [158.0, 105.0], [158.0, 130.0], [50.0, 130.0]], ['知识清单', 0.9985876083374023]], [[[83.0, 148.0], [209.0, 148.0], [209.0, 165.0], [83.0, 165.0]], ['1.点到直线的距离', 0.999904215335846]], [[[84.0, 176.0], [395.0, 176.0], [395.0, 192.0], [84.0, 192.0]], ["已知直线l的方向向量是a，点P#l，P'el，则点", 0.8676444292068481]], [[[83.0, 223.0], [236.0, 223.0], [236.0, 240.0], [83.0, 240.0]], ['P到直线l的距离为d：', 0.932934045791626]], [[[270.0, 227.0], [289.0, 227.0], [289.0, 234.0], [270.0, 234.0]], ['DP', 0.7328389883041382]], [[[83.0, 269.0], [438.0, 269.0], [438.0, 286.0], [83.0, 286.0]], ['两条平行直线间的距离可以转化为点到直线的距离，', 0.9892963767051697]], [[[82.0, 297.0], [209.0, 297.0], [209.0, 313.0], [82.0, 313.0]], ['2.点到平面的距离', 0.9998923540115356]], [[[84.0, 323.0], [567.0, 323.0], [567.0, 340.0], [84.0, 340.0]], ['已知AB为平面α的一条斜线段（点A在平面α内）,n为平面α的法向量，', 0.9887251853942871]], [[[47.0, 358.0], [392.0, 360.0], [392.0, 387.0], [47.0, 385.0]], ['则点B到平面α的距离为d=AB|·cos<AB,n)', 0.9637517929077148]], [[[378.0, 351.0], [438.0, 351.0], [438.0, 372.0], [378.0, 372.0]], ['無·n', 0.6339988112449646]], [[[447.0, 365.0], [570.0, 365.0], [570.0, 382.0], [447.0, 382.0]], ['空间中其他距离', 0.9999088644981384]], [[[49.0, 408.0], [85.0, 408.0], [85.0, 427.0], [49.0, 427.0]], ['问题', 0.9955520629882812]], [[[86.0, 435.0], [296.0, 435.0], [296.0, 452.0], [86.0, 452.0]], ['一般都可以转化为点面距问题.', 0.9974074363708496]], [[[49.0, 479.0], [159.0, 479.0], [159.0, 507.0], [49.0, 507.0]], ['例题讲评', 0.9977942109107971]], [[[91.0, 525.0], [571.0, 525.0], [571.0, 545.0], [91.0, 545.0]], ['例1如图，在棱长为2的正方体ABCD-A,B,C,D,中,E是BC的中点，P', 0.9382723569869995]], [[[52.0, 558.0], [286.0, 558.0], [286.0, 575.0], [52.0, 575.0]], ['是AE上的动点，求DP的最小值', 0.9763669371604919]], [[[84.0, 725.0], [570.0, 727.0], [570.0, 748.0], [84.0, 746.0]], ['练习1在长方体0ABC-0,A,B,C,中，0A=2,AB=3,AA,=2,求0,到', 0.887532114982605]], [[[50.0, 759.0], [159.0, 759.0], [159.0, 779.0], [50.0, 779.0]], ['直线AC的距离.', 0.9977055788040161]]]}
# layout_result = {'errorCode': 0, 'msg': '识别成功', 'data': '{"Ids_Scores_boxes": "[[[1], 0.6768088340759277, [132.82876458235634, 4.867646808112585, 484.25709724895194, 24.110981528087148]], [[1], 0.5721949338912964, [50.41460480158925, 478.7880785462551, 157.3041640894006, 504.33530555645126]], [[1], 0.699893593788147, [53.0, 526.0, 570.0, 576.0]], [[10, 0], 0.6270818710327148, [52.00979196153577, 761.0801100416344, 155.13669618897046, 776.1116098266145]], [[10], 0.0, [86.0, 727.0, 569.0, 747.0]], [[1], 0.8837159276008606, [51.657900767122186, 103.97743590987875, 157.41005498988994, 129.50588956440203]], [[0], 0.0, [50.0, 150.0, 569.0, 451.0]], [[1], 0.0, [276.0, 58.0, 347.0, 76.0]]]", "boxes_num": "8"}'}
# #print(ocr_result['data'][0])
# filtered_layout_boxes,filtered_layout_ocrs = filter_boxes(ocr_result, layout_result)
# print(filtered_layout_ocrs)
# print(filtered_layout_boxes)
# # 测试用例
