import requests
import json
from config.config import APP_ID
from config.config import SECRET_CODE

from loguru import logger

def get_file_content(filePath):
    with open(filePath, 'rb') as fp:
        return fp.read()

class TextinOcr(object):
    def __init__(self):
        self.host = 'https://api.textin.com'

    def recognize_pdf2md(self, image_path, options=None):
        """
        pdf to markdown
        :param options: request params
        :param image: file bytes
        :return: response

        options = {
            'pdf_pwd': None,
            'dpi': 144,  # 设置dpi为144
            'page_start': 0,
            'page_count': 1000,  # 设置解析的页数为1000页
            'apply_document_tree': 0,
            'markdown_details': 1,
            'page_details': 0,  # 不包含页面细节信息
            'table_flavor': 'md',
            'get_image': 'none',
            'parse_mode': 'scan',  # 解析模式设为scan
        }
        """
        image=get_file_content(image_path)
        if options==None:
            options={
                'table_flavor': 'md',
                'parse_mode': 'scan',  # 设置解析模式为scan模式
                'page_details': 1,  # 不包含页面细节
                'markdown_details': 1,
                'apply_document_tree': 1,
                'dpi': 144  # 分辨率设置为144 dpi
                }
        url = self.host + '/ai/service/v1/pdf_to_markdown'
        headers = {
            'x-ti-app-id': APP_ID,
            'x-ti-secret-code': SECRET_CODE
        }
        
        response=requests.post(url, data=image, headers=headers, params=options)
        #logger.info(f'textln response=\n{response}')
        #logger.info(f'Textln response infos={response}')
        if response.status_code == 200:
            time_cost=response.elapsed.total_seconds()
            result = json.loads(response.text)
            #logger.info(f'textln_init_infos={result}')
            logger.info(f'textln response_time_cost={time_cost}\n\ntextln response=\n{result}')
            return result['result']['markdown'],time_cost,result['result']['detail']
        else:
            logger.info('TextinOcr 请求失败 ，错误信息={}'.format(response))
            return [],'',[]


if __name__ == "__main__":
    # 请登录后前往 “工作台-账号设置-开发者信息” 查看 app-id/app-secret
    textin = TextinOcr()

    resp ,time_cost= textin.recognize_pdf2md(image_path='/data/wangtengbo/got_ocr2/infer/QQ图片20240926223216.png')
    print("request time: ", time_cost)
    print(resp)
    # result = json.loads(resp.text)
    # print(result)
    # with open('./result.json', 'w', encoding='utf-8') as fw:
    #     json.dump(result, fw, indent=4, ensure_ascii=False)
