import os
from datetime import datetime
import langid
import numpy as np
from sklearn.cluster import KMeans
from collections import Counter
from loguru import logger

def mkdir_if_not_exist(path):
    if not os.path.exists(path):
        os.makedirs(path)

# 获取毫秒级时间
def get_millisecond_time():
    current_time = datetime.now()
    time_str = current_time.strftime("%Y%m%d%H%M%S%f")[:-3]
    return time_str

def get_lang(text):
    lang_detect, _ = langid.classify(text.replace('。', ' ').replace('，', ' '))  # 语言检测
    return 'en' if lang_detect == 'en' else 'zh'

def get_day_time():
    # 获取当前日期和时间
    now = datetime.now()
    # 格式化日期和时间为字符串，格式为 "YYYYMMDD_HH%M%S"
    formatted_time = now.strftime("%Y%m%d_%H%M%S")
    return formatted_time

def merge_ver_boxes(formula_positions):
    def sort_by_y_min(box):
        return box[2][1]

    formula_positions.sort(key=sort_by_y_min)

    merged_boxes = []
    current_box = None

    for box in formula_positions:
        category, confidence, bbox = box
        x_min, y_min, x_max, y_max = bbox

        if current_box is None:
            current_box = [category, confidence, bbox]
        else:
            curr_x_min, curr_y_min, curr_x_max, curr_y_max = current_box[2]

            if y_min <= curr_y_max:
                # Merge the boxes
                merged_bbox = [
                    min(curr_x_min, x_min),
                    min(curr_y_min, y_min),
                    max(curr_x_max, x_max),
                    max(curr_y_max, y_max)
                ]
                current_box[2] = merged_bbox
            else:
                # Append the current box to merged_boxes list and start a new current_box
                merged_boxes.append(current_box)
                current_box = [category, confidence, bbox]

    if current_box is not None:
        merged_boxes.append(current_box)

    return merged_boxes
def merge_hor_boxes(formula_positions):
    """
    Merges horizontally overlapping or adjacent bounding boxes.

    Args:
        formula_positions (list): A list of bounding box data where each item is 
                                   in the format (category, confidence, [x_min, y_min, x_max, y_max]).

    Returns:
        list: A list of merged bounding box data in the same format.
    """
    def sort_by_x_min(box):
        return box[2][0]  # Sort by x_min

    # Sort the boxes by their x_min coordinate
    formula_positions.sort(key=sort_by_x_min)

    merged_boxes = []
    current_box = None

    for box in formula_positions:
        category, confidence, bbox = box
        x_min, y_min, x_max, y_max = bbox

        if current_box is None:
            current_box = [category, confidence, bbox]
        else:
            curr_x_min, curr_y_min, curr_x_max, curr_y_max = current_box[2]

            if x_min <= curr_x_max:  # Check if boxes overlap or are adjacent horizontally
                # Merge the boxes
                merged_bbox = [
                    min(curr_x_min, x_min),
                    min(curr_y_min, y_min),
                    max(curr_x_max, x_max),
                    max(curr_y_max, y_max)
                ]
                current_box[2] = merged_bbox
            else:
                # Append the current box to merged_boxes list and start a new current_box
                merged_boxes.append(current_box)
                current_box = [category, confidence, bbox]

    if current_box is not None:
        merged_boxes.append(current_box)

    return merged_boxes

def merge_boxes_by_clustering(formula_positions, target_num_boxes=4):
    # Extract bounding box centers as features for clustering
    box_centers = np.array([((bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2) for _, _, bbox in formula_positions])

    # Initialize KMeans with the target number of clusters (boxes)
    kmeans = KMeans(n_clusters=target_num_boxes, random_state=0).fit(box_centers)

    # Assign cluster labels to each bounding box
    labels = kmeans.labels_

    # Merge bounding boxes based on cluster labels
    merged_boxes = []
    for i in range(target_num_boxes):
        boxes_in_cluster = [formula_positions[j] for j in range(len(formula_positions)) if labels[j] == i]

        # Calculate merged bounding box for each cluster
        min_x = min([bbox[0] for _, _, bbox in boxes_in_cluster])
        min_y = min([bbox[1] for _, _, bbox in boxes_in_cluster])
        max_x = max([bbox[2] for _, _, bbox in boxes_in_cluster])
        max_y = max([bbox[3] for _, _, bbox in boxes_in_cluster])

        merged_bbox = [min_x, min_y, max_x, max_y]
        merged_boxes.append((None, None, merged_bbox))  # Replace None with category and confidence if needed

    return merged_boxes

def merge_horizontal_boxes(formula_positions, num_big_boxes):
    """
    将给定的公式框从上到下合并成指定数量的大框，并保证这些大框之间没有垂直重叠。
    
    参数:
        formula_positions: List[Tuple[List[int], float, List[float]]]
            每个公式的位置数据结构为 (category_list, confidence, [x_min, y_min, x_max, y_max])
        num_big_boxes: int
            期望最终合并得到的大框数量

    返回:
        List[Tuple[List[int], float, List[float]]]
        返回合并后的大框列表，结构与输入相同。
    """
    if not formula_positions:
        return []

    # 按 y_min 排序
    formula_positions.sort(key=lambda box: box[2][1])

    total = len(formula_positions)
    group_size = (total + num_big_boxes - 1) // num_big_boxes  # 向上取整分组

    merged_boxes = []
    for i in range(num_big_boxes):
        start_idx = i * group_size
        end_idx = (i + 1) * group_size
        group = formula_positions[start_idx:end_idx]

        if not group:
            continue
        
        # 合并本组框
        category = group[0][0]
        confidence = group[0][1]
        x_min, y_min, x_max, y_max = group[0][2]

        for j in range(1, len(group)):
            c, conf, bbox = group[j]
            # 更新置信度，如取最大值
            if conf > confidence:
                confidence = conf
            
            gx_min, gy_min, gx_max, gy_max = bbox
            x_min = min(x_min, gx_min)
            y_min = min(y_min, gy_min)
            x_max = max(x_max, gx_max)
            y_max = max(y_max, gy_max)
        
        merged_boxes.append([category, confidence, [x_min, y_min, x_max, y_max]])

    # 确保大框之间无重叠
    # 假设merged_boxes已按y_min排序（因为我们在分组时就是按排序后的顺序合并的）
    for i in range(1, len(merged_boxes)):
        prev_box = merged_boxes[i - 1]
        curr_box = merged_boxes[i]
        
        _, _, [prev_x_min, prev_y_min, prev_x_max, prev_y_max] = prev_box
        _, _, [curr_x_min, curr_y_min, curr_x_max, curr_y_max] = curr_box
        
        # 如果当前框的y_min <= 上一个框的y_max，说明有重叠，需要调整
        if curr_y_min <= prev_y_max:
            # 将当前框向下平移，使得curr_y_min = prev_y_max + 1
            shift = (prev_y_max + 1) - curr_y_min
            curr_y_min += shift
            curr_y_max += shift
            # 更新当前框的坐标
            curr_box[2] = [curr_x_min, curr_y_min, curr_x_max, curr_y_max]

    return merged_boxes



def process_formula_positions(formula_positions, target_num_boxes=2):
    """
    处理公式位置，合并垂直方向上的框和聚类框，并返回最终的边界框。
    
    参数:
        formula_positions: List[Tuple[List[int], float, List[float]]] 
            每个公式的位置，由类别、置信度和边界框组成。
        target_num_boxes: int
            目标聚类框的数量。
    
    返回:
        List[List[float]]: 合并后的公式边界框列表。
    """
    # print(formula_positions)
    # hor_merges=merge_horizontal_boxes(formula_positions)
    # logger.info(f'hor_merges={hor_merges}')
    #合并垂直方向上的框
    merged_boxes_ver = merge_ver_boxes(formula_positions)
    #print(len(merged_boxes_ver))
    merged_boxes_hor=merge_horizontal_boxes(merged_boxes_ver,target_num_boxes)
    #print(merged_boxes_hor)
    #print(len(merged_boxes_hor))
    #merged_boxes_hor = merge_hor_boxes(formula_positions)
    # if len(formula_positions) < target_num_boxes:
    #     target_num_boxes = len(formula_positions)

    # # 使用聚类算法合并框，仅保留 target_num_boxes 个框
    # merged_boxes = merge_boxes_by_clustering(merged_boxes_hor, target_num_boxes=target_num_boxes)

    # 提取合并后的边界框信息
    formula_boxes = [data[2] for data in merged_boxes_hor]
    return formula_boxes

import numpy as np

def merge_bounding_boxes(formula_positions):
    # 转换为NumPy数组
    positions_array = np.array(formula_positions)
    
    # 找到最左、最上、最右、最下的点
    min_x1 = np.min(positions_array[:, 0])
    min_y1 = np.min(positions_array[:, 1])
    max_x2 = np.max(positions_array[:, 2])
    max_y2 = np.max(positions_array[:, 3])
    
    # 返回新的大的边界框
    return [min_x1, min_y1, max_x2, max_y2]
    
if __name__ == "__main__":
    # 示例
    formula_positions = [
        [1, 2, 3, 4],
        [2, 3, 5, 6],
        [0, 1, 4, 5]
    ]

    merged_box = merge_bounding_boxes(formula_positions)
    print(merged_box)  # 输出：[0, 1, 5, 6]
