import os
from datetime import datetime
import langid
import numpy as np
from sklearn.cluster import KMeans

def mkdir_if_not_exist(path):
    """
    如果目录不存在，则创建目录。
    
    参数:
    path (str): 要创建的目录路径。
    """
    if not os.path.exists(path):
        os.makedirs(path)

def get_millisecond_time():
    """
    获取当前时间（精确到毫秒）。
    
    返回:
    str: 格式化的当前时间字符串（精确到毫秒）。
    """
    current_time = datetime.now()
    time_str = current_time.strftime("%Y%m%d%H%M%S%f")[:-3]
    return time_str

def get_lang(text):
    """
    检测给定文本的语言。
    
    参数:
    text (str): 要检测语言的文本。
    
    返回:
    str: 'en' 表示英文，'zh' 表示中文。
    """
    lang_detect, _ = langid.classify(text.replace('。', ' ').replace('，', ' '))
    return 'en' if lang_detect == 'en' else 'zh'

def get_day_time():
    """
    获取当前日期和时间的格式化字符串。
    
    返回:
    str: 格式化的当前日期和时间字符串（格式为 "YYYYMMDD_HH%M%S"）。
    """
    now = datetime.now()
    formatted_time = now.strftime("%Y%m%d_%H%M%S")
    return formatted_time

def merge_ver_boxes(formula_positions):
    """
    合并垂直方向上重叠的边界框。
    
    参数:
    formula_positions (list): 包含类别、置信度和边界框的元组列表。
    
    返回:
    list: 合并后的边界框。
    """
    def sort_by_y_min(box):
        return box[2][1]

    formula_positions.sort(key=sort_by_y_min)

    merged_boxes = []
    current_box = None

    for box in formula_positions:
        category, confidence, bbox = box
        x_min, y_min, x_max, y_max = bbox

        if current_box is None:
            current_box = [category, confidence, bbox]
        else:
            curr_x_min, curr_y_min, curr_x_max, curr_y_max = current_box[2]

            if y_min <= curr_y_max:
                merged_bbox = [
                    min(curr_x_min, x_min),
                    min(curr_y_min, y_min),
                    max(curr_x_max, x_max),
                    max(curr_y_max, y_max)
                ]
                current_box[2] = merged_bbox
            else:
                merged_boxes.append(current_box)
                current_box = [category, confidence, bbox]

    if current_box is not None:
        merged_boxes.append(current_box)

    return merged_boxes

def merge_hor_boxes(formula_positions):
    """
    合并水平方向上重叠的边界框。
    
    参数:
    formula_positions (list): 包含类别、置信度和边界框的元组列表。
    
    返回:
    list: 合并后的边界框。
    """
    def sort_by_x_min(box):
        return box[2][0]

    formula_positions.sort(key=sort_by_x_min)

    merged_boxes = []
    current_box = None

    for box in formula_positions:
        category, confidence, bbox = box
        x_min, y_min, x_max, y_max = bbox

        if current_box is None:
            current_box = [category, confidence, bbox]
        else:
            curr_x_min, curr_y_min, curr_x_max, curr_y_max = current_box[2]

            if x_min <= curr_x_max:
                merged_bbox = [
                    min(curr_x_min, x_min),
                    min(curr_y_min, y_min),
                    max(curr_x_max, x_max),
                    max(curr_y_max, y_max)
                ]
                current_box[2] = merged_bbox
            else:
                merged_boxes.append(current_box)
                current_box = [category, confidence, bbox]

    if current_box is not None:
        merged_boxes.append(current_box)

    return merged_boxes

def merge_boxes_by_clustering(formula_positions, target_num_boxes=4):
    """
    使用聚类算法合并边界框。
    
    参数:
    formula_positions (list): 包含类别、置信度和边界框的元组列表。
    target_num_boxes (int): 目标合并成的边界框数量。
    
    返回:
    list: 基于聚类的合并边界框。
    """
    box_centers = np.array([((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2) for _, _, bbox in formula_positions])
    kmeans = KMeans(n_clusters=target_num_boxes, random_state=0).fit(box_centers)
    labels = kmeans.labels_

    merged_boxes = []
    for i in range(target_num_boxes):
        boxes_in_cluster = [formula_positions[j] for j in range(len(formula_positions)) if labels[j] == i]

        if not boxes_in_cluster:
            continue

        min_x = min([bbox[0] for _, _, bbox in boxes_in_cluster])
        min_y = min([bbox[1] for _, _, bbox in boxes_in_cluster])
        max_x = max([bbox[2] for _, _, bbox in boxes_in_cluster])
        max_y = max([bbox[3] for _, _, bbox in boxes_in_cluster])

        merged_bbox = [min_x, min_y, max_x, max_y]
        merged_boxes.append((None, None, merged_bbox))

    return merged_boxes

def process_formula_positionsv2(formula_positions, target_num_boxes=4):
    """
    通过合并垂直、水平和聚类框来处理公式位置。
    
    参数:
    formula_positions (list): 包含类别、置信度和边界框的元组列表。
    target_num_boxes (int): 目标合并成的边界框数量。
    
    返回:
    list: 最终合并的边界框列表。
    """
    merged_boxes_ver = merge_ver_boxes(formula_positions)
    #merged_boxes_hor = merge_hor_boxes(merged_boxes_ver)
    # print(merged_boxes_ver)
    # print(merged_boxes_hor)
    if len(merged_boxes_ver) < target_num_boxes:
        target_num_boxes = len(merged_boxes_ver)

    merged_boxes = merge_boxes_by_clustering(merged_boxes_ver, target_num_boxes=target_num_boxes)
    formula_boxes = [data[2] for data in merged_boxes]
    return formula_boxes

def merge_bounding_boxes(formula_positions):
    """
    将多个边界框合并为一个大的边界框。
    
    参数:
    formula_positions (list): 边界框列表。
    
    返回:
    list: 合并后的大边界框。
    """
    positions_array = np.array(formula_positions)
    min_x1 = np.min(positions_array[:, 0])
    min_y1 = np.min(positions_array[:, 1])
    max_x2 = np.max(positions_array[:, 2])
    max_y2 = np.max(positions_array[:, 3])
    
    return [min_x1, min_y1, max_x2, max_y2]



if __name__ == "__main__":
    formula_positions = [[[6], 0.0, [1609.0, 199.0, 2127.0, 329.0]], [[0], 0.9736799597740173, [225.93232349219818, 1611.367297878681, 2017.0720675821328, 2075.073957172845]], [[0], 0.9499262571334839, [1579.0, 2211.0, 1992.0, 2841.0]], [[1], 0.4914904832839966, [320.0, 2088.0, 739.0, 2161.0]], [[1], 0.4311352074146271, [228.9372284589486, 1553.0413806681088, 726.2493805957434, 1587.8428093167895]], [[1], 0.0, [765.0, 972.0, 842.0, 1011.0]], [[1], 0.0, [778.0, 1329.0, 848.0, 1368.0]], [[1], 0.0, [431.0, 717.0, 634.0, 766.0]], [[1], 0.0, [327.0, 1152.0, 554.0, 1195.0]], [[1], 0.0, [351.0, 1460.0, 735.0, 1506.0]], [[1], 0.0, [1012.0, 517.0, 1352.0, 566.0]], [[0], 0.0, [1059.0, 713.0, 1867.0, 874.0]], [[1], 0.0, [1062.0, 930.0, 1696.0, 962.0]], [[1], 0.0, [1055.0, 1024.0, 1249.0, 1093.0]], [[1], 0.0, [1059.0, 1149.0, 1840.0, 1182.0]], [[1], 0.0, [1095.0, 1247.0, 1980.0, 1345.0]], [[1], 0.0, [1092.0, 1413.0, 1346.0, 1486.0]], [[7], 0.4297367036342621, [1976.0, 2958.0, 2067.0, 2994.0]], [[0], 0.0, [227.0, 2235.0, 1453.0, 2854.0]]]
    #formula_positions=[data[2] for data in formula_positions]
    merged_box = process_formula_positions(formula_positions)
    print(merged_box)  # 输出：[0, 1, 5, 6]
