init

6d5baff6 · unknown · 6d5baff6 · 6d5baff6 · 6d5baff6 · 6d5baff6
Commit 6d5baff6 authored May 29, 2025 by unknown
10 changed files
--- a/api_service.py
+++ b/api_service.py
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+import os,re,json,sys
+__path__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__path__)
+sys.path.append(os.path.join(__path__, 'web_search_source'))
+import uvicorn
+import asyncio
+import logging
+from fastapi import FastAPI, Request, HTTPException
+from fastapi.responses import JSONResponse
+from fastapi.exceptions import RequestValidationError
+from pydantic import BaseModel
+from save_es_database import ESsearch
+from web_search_source.web_search_resource import webSearchResource
+def setup_logger():
+    # 创建logger对象
+    logger = logging.getLogger('AI_planner')
+    logger.setLevel(logging.INFO)
+    # 确保日志目录存在
+    __path__ = os.path.dirname(os.path.abspath(__file__))
+    log_dir = os.path.join(__path__, "log")
+    if not os.path.exists(log_dir):
+        os.makedirs(log_dir)
+    log_file = os.path.join(log_dir, "AI_planner.log")
+    # 创建文件处理器
+    file_handler = logging.FileHandler(log_file, encoding='utf-8')
+    file_handler.setLevel(logging.INFO)
+    # 创建控制台处理器
+    # console_handler = logging.StreamHandler()
+    # console_handler.setLevel(logging.INFO)
+    # 设置日志格式
+    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    file_handler.setFormatter(formatter)
+    # console_handler.setFormatter(formatter)
+    # 添加处理器
+    logger.addHandler(file_handler)
+    # logger.addHandler(console_handler)
+    return logger
+# 创建全局logger实例
+logger_es = setup_logger()
+class Item(BaseModel):
+    tableName: str = ""  # sop表名称
+    bookCategory: str = "" # 图书类别：K12，童书，其他图书
+    schoolStage: str = ""  # 学段 “初中”  “小学”  “高中”
+    studentGrade: str = ""   # 年级
+    subjectCategory: str = ""  # 学科 语文 数学
+    bookVersion: str = ""  # 版本  如 人教版  通用版
+    purpose: str = ""      # 目标读者的目的
+    keyword: str = ""     # 需要查询的关键字
+class ItemWebSearch(BaseModel):
+    bookName: str = ""        # 书名
+    bookClassify: str = ""    # 图书类别
+    author: str = ""           # 图书作者
+    introduction: str = ""     # 图书简介
+app = FastAPI()
+es_search = ESsearch(hosts=['http://localhost:9200'])
+# 中文翻译为朝鲜文相关api接口
+# 1、心跳检测
+@app.get("/health/")
+async def health():
+    res = JSONResponse(status_code=200, content={"message": "no AI source up."})
+    return res
+# 2、es中 搜索 非AI资源
+@app.post("/no_ai_source/")
+async def no_ai_source(input: Item):
+    # print("input: ",input)
+    """
+    接收一字典参数，返回es搜索的 非AI资源清单
+    """
+    try:
+        input = json.loads(input.json())
+        es_res = es_search.search(input)
+        logger_es.info(f"ES search no_ai_source input : {input} ; \nes_search : {json.dumps(es_res, ensure_ascii=False, indent=4)}")
+        res =  JSONResponse(status_code=200, content=es_res)
+    except Exception as e:
+        logger_es.error(f" no_ai_source input : {input}; error message : {e}")
+        res = JSONResponse(
+            status_code=500,
+            content={"message": str(e)},
+        )
+    return res
+if __name__ == "__main__":
+    uvicorn.run(app="api_service:app", host="0.0.0.0", port=9860, workers=1) # 部署的服务是 9860
+# 启动api服务  116.63.110.220 服务器
+# netstat -ntlp | grep 9860
+# cd /home/liuxin/work/AI_planner
+# conda activate translate
+# nohup python -u api_service.py > log/api_service.log 2>&1 &
+# tail -f log/api_service.log
+# tail -f log/AI_planner.log
+# uvicorn api_service:app --host 0.0.0.0 --port 9860 --workers 1
--- a/data/~$tst_data.xlsx
+++ b/data/~$tst_data.xlsx
--- a/data/商务资源SOP管理表.xlsx
+++ b/data/商务资源SOP管理表.xlsx
--- a/data/非AI工具/商务资源SOP管理表es.json
+++ b/data/非AI工具/商务资源SOP管理表es.json
--- a/knowledge_search.py
+++ b/knowledge_search.py
+import re, json, sys , os
+import pandas as pd
+# 非AI项目 资源清单 数据库  每条数据转为json字符串
+def knowledge_json(file, sheet_names:list, save_file):
+    length_record = []
+    save_jsonl_data = []
+    for sheet_name in sheet_names:
+        data = pd.read_excel(file, sheet_name=sheet_name, keep_default_na=False)
+        data = data.to_dict(orient='records')
+        for line in data:
+            if "序号" in line:
+                line.pop("序号")
+            try:
+                line = json.dumps(line, ensure_ascii=False)
+                line = re.sub("\n", "", line)
+                line = re.sub("\t", "", line)
+                length_record.append(len(line))
+                if len(line) > 3000:
+                    print(line)
+                save_jsonl_data.append(line)
+            except:
+                print(line)
+    res =  "\n".join(save_jsonl_data)
+    with open(save_file, 'w', encoding='utf-8') as f:
+        f.write(res)
+    print(f"max length: {max(length_record)}")
+if __name__ == "__main__":
+    file = r"D:\0_shu_chuan_work\work\AI_planner\data\非AI工具.xlsx"
+    save_file = r"D:\0_shu_chuan_work\work\AI_planner\data\伴学工具0.txt"  # max length: 157
+    sheet_names = ['伴学工具20250214']    # max length: 157
+    sheet_names = ['第三方自有资源评级详表']  # max length: 429
+    sheet_names = ['品牌资源（爱奇艺+慕课）']  # max length: 333
+    sheet_names = ['小睿资讯服务']          # max length: 2916
+    sheet_names = ['测评库资源']       #  max length: 259
+    knowledge_json(file, sheet_names, save_file)
+    print("finished.")
--- a/prapare_es_data.py
+++ b/prapare_es_data.py
+import copy
+import re,json,os,sys
+import pandas as pd
+import copy
+class prepareESdata():
+    # 整理非AI资源sop中的数据，方便保存进es数据库
+    def __init__(self):
+        self.head2key = []
+        pass
+    def clean(self, text):
+        text = text.strip()
+        text = re.sub("“", " ", text)
+        text = re.sub("”", " ", text) # “”
+        text = re.sub('"', " ", text) # ""'
+        text = re.sub("'", " ", text)
+        return text
+    def add_message(self, message):
+        add_info = []
+        if "中考" in message:
+            add_info.append('初三')
+        if "幼小衔接" in message:
+            add_info.append('一年级')
+        if "五六年级" in message:
+            add_info.append('五年级')
+            add_info.append('六年级')
+        if "中小学" in message:
+            add_info.append('小学')
+            add_info.append('初中')
+        if "初升高" in message:
+            add_info.append('初三')
+        if "中小学" in message:
+            add_info.append('小学')
+            add_info.append('初中')
+        if "1年级" in message:
+            add_info.append('一年级')
+        if "2年级" in message:
+            add_info.append('二年级')
+        if "3年级" in message:
+            add_info.append('三年级')
+        if "1-2年级" in message:
+            add_info.append('一年级')
+            add_info.append('二年级')
+        if "3-6年级" in message:
+            add_info.append('三年级')
+            add_info.append('四年级')
+            add_info.append('五年级')
+            add_info.append('六年级')
+        if "初升高" in message:
+            add_info.append('初三')
+        add_info = list(set(add_info))
+        add_info = "；".join(add_info)
+        return add_info
+    def study_tool(self, excel_path, sheet_name):
+        # 1、伴学工具
+        head2key1 = {"raysAppId":"raysAppId", "typeCode":"typeCode", "typeName":"resourceName",
+                    "subjectType":"subjectCategory", "title":"resourceDescribe"}
+        self.head2key.append(head2key1)
+        key_value = {"bookCategory":"K12", "tableName":"伴学工具"}
+        data = pd.read_excel(excel_path, sheet_name=sheet_name)
+        data.fillna("")
+        data = data.to_dict(orient='records')
+        save_data = []
+        for item in data:
+            temp = copy.deepcopy(key_value)
+            for k, v in item.items():
+                v = str(v)
+                if v == "NaT":
+                    v = ""
+                if k in head2key1.keys() :
+                    temp[head2key1[k]] = self.clean( v)
+            message = [temp['bookCategory'], temp['subjectCategory'], temp['resourceName'], temp['resourceDescribe']]
+            message = [item for item in message if item]
+            message = list(set(message))
+            message = "；".join(message)
+            temp['message'] = message + "；" + self.add_message(message)
+            save_data.append(temp)
+        return save_data
+    def third_party_own_resource(self, excel_path, sheet_name):
+        # 2、第三方自有资源评级详表
+        head2key2 = {"资源编号": "typeCode", "创建时间": "createTime", "资源名称": "resourceName", "作品类型": "workType",
+                   "作者名称": "authorName", "资源简介": "resourceDescribe", "图书类型/受众": "bookCategory",
+                   "学段": "schoolStage", "年级": "studentGrade", "学科": "subjectCategory", "内容": "resourceContent",
+                   "版本": "bookVersion","科目/标签":"subjectORlabel", "年级/深度":"gradeDeep",  "目的": "purpose",
+                    "学期":"studentSemester", "版本.1":"textBookEdition", "销售价": "bookPrice"
+                   }
+        self.head2key.append(head2key2)
+        data = pd.read_excel(excel_path, sheet_name).fillna("")
+        data = data.to_dict(orient='records')
+        save_data = []
+        key_value = {"tableName":"第三方自有资源评级详表"}
+        for item in data:
+            temp = copy.deepcopy(key_value)
+            if "缺" in item['资源简介']:
+                continue
+            for k, v in item.items():
+                v = str(v)
+                if v == "NaT":
+                    v = ""
+                if k=='创建时间':
+                    v = str(v)[:10]
+                if k in head2key2.keys() :
+                    temp[head2key2[k]] = self.clean( v)
+            message = [temp['bookCategory'], temp['resourceName'], temp['workType'], temp['resourceDescribe'], temp['schoolStage'],
+                       temp['studentGrade'], temp['subjectCategory'], temp['resourceContent'], temp['bookVersion'], temp['subjectORlabel'],
+                        temp['gradeDeep'], temp['purpose'],temp['studentSemester'], temp['textBookEdition'],
+                       ]
+            message = [item for item in message if item]
+            message = list(set(message))
+            message = "；".join(message)
+            temp['message'] = message+ "；" + self.add_message(message)
+            save_data.append(temp)
+        return save_data
+    def brand_resource(self, excel_path, sheet_name):
+        # 3 品牌资源（爱奇艺+慕课）
+        head2key3 = {"资源编号": "typeCode", "创建时间": "createTime", "作品名称": "resourceName", "作品类型": "workType",
+                   "作者名称": "authorName",  "图书类型/受众": "bookCategory", "科目/标签":"subjectORlabel", "年级/深度":"gradeDeep",
+                     "外链销售价": "bookPrice"
+                   }
+        self.head2key.append(head2key3)
+        data = pd.read_excel(excel_path, sheet_name).fillna("")
+        data = data.to_dict(orient='records')
+        save_data = []
+        key_value = {"tableName":"品牌资源（爱奇艺+慕课）"}
+        for item in data:
+            temp = copy.deepcopy(key_value)
+            for k, v in item.items():
+                v = str(v)
+                if v == "NaT":
+                    v = ""
+                if k == '创建时间':
+                    v = str(v)[:10]
+                if k in head2key3.keys() :
+                    temp[head2key3[k]] = self.clean( v)
+            message = [
+                temp['resourceName'], temp['workType'], temp['bookCategory'], temp['subjectORlabel'], temp['gradeDeep']
+                       ]
+            message = [item for item in message if item]
+            message = list(set(message))
+            message = "；".join(message)
+            temp['message'] = message+ "；" + self.add_message(message)
+            save_data.append(temp)
+        return save_data
+    def xiaorui_information(self, excel_path, sheet_name):
+        # 4、小睿资讯服务
+        head2key4 = {"资讯编号": "typeCode",  "创建时间": "createTime", "资讯标题（26字内）": "resourceName", "资讯来源":"resourceFrom",
+                     "受众": "audience", "科目/标签": "subjectORlabel", "目的": "purpose", "bookCategory":"bookCategory"
+                    }
+        self.head2key.append(head2key4)
+        data = pd.read_excel(excel_path, sheet_name).fillna("")
+        data = data.to_dict(orient='records')
+        save_data = []
+        key_value = {"tableName": "小睿资讯服务"}
+        for item in data:
+            temp = copy.deepcopy(key_value)
+            for k, v in item.items():
+                v=str(v)
+                if v == "NaT":
+                    v = ""
+                if k == '创建时间':
+                    v = str(v)[:10]
+                    if v=="NaT":
+                        v= ""
+                if k == "bookCategory" and v not in ["K12", "童书"]:
+                    v = "其他图书"
+                if k in head2key4.keys() :
+                    temp[head2key4[k]] = self.clean( v)
+            message = [temp['bookCategory'], temp['resourceName'],  temp['resourceFrom'], temp['subjectORlabel'],
+                       temp['audience'],  temp['purpose'],
+                       ]
+            message = [item for item in message if item]
+            message = list(set(message))
+            message = "；".join(message)
+            temp['message'] = message+ "；" + self.add_message(message)
+            save_data.append(temp)
+        return save_data
+    def evaluation_database(self, excel_path, sheet_name):
+        # 5、测评库资源
+        head2key5 = {"资源编号": "typeCode",  "创建时间": "createTime",
+                     "作品名称": "resourceName", "资源简介": "resourceDescribe", "科目/标签": "subjectCategory", "年级/深度":"gradeDeep",
+                    "图书类型/受众":"bookCategory"
+                    }
+        self.head2key.append(head2key5)
+        data = pd.read_excel(excel_path, sheet_name).fillna("")
+        data = data.to_dict(orient='records')
+        save_data = []
+        key_value = {"tableName": "测评库资源"}
+        for item in data:
+            temp = copy.deepcopy(key_value)
+            for k, v in item.items():
+                v=str(v)
+                if v == "NaT":
+                    v = ""
+                if k == '创建时间':
+                    v = str(v)[:10]
+                if k == "图书类型/受众":
+                    bookCategory = []
+                    if "K12" in v:
+                        bookCategory.append('K12')
+                    if "童书" in v:
+                        bookCategory.append('童书')
+                    if "大众生活" in v:
+                        bookCategory.append('其他图书')
+                    if "不限" in v:
+                        bookCategory.append('K12')
+                        bookCategory.append('童书')
+                        bookCategory.append('其他图书')
+                    if not bookCategory:
+                        bookCategory.append('其他图书')
+                    bookCategory = list(set(bookCategory))
+                    bookCategory = "；".join(bookCategory)
+                    v = bookCategory
+                if k in head2key5.keys() :
+                    temp[head2key5[k]] = self.clean( v)
+            message = [temp['bookCategory'], temp['resourceName'], temp['resourceDescribe'],
+                       temp['subjectCategory'],  temp['gradeDeep'],
+                       ]
+            message = [item for item in message if item]
+            message = list(set(message))
+            message = "；".join(message)
+            temp['message'] = message+ "；" + self.add_message(message)
+            save_data.append(temp)
+        return save_data
+    def existing_tool(self, excel_path, sheet_name):
+        # 6、已有工具（数独闯关不算）
+        # 大多是AI类工具 （不考虑写进es数据库）
+        head2key6 = {"资源编号": "typeCode",  "工具名称": "resourceName", "介绍": "resourceDescribe", "科目/标签": "subjectCategory",
+                    "年级/深度": "gradeDeep",
+                    "图书类型/受众": "bookCategory"
+                    }
+        # # self.head2key.append(head2key6)
+        data = pd.read_excel(excel_path, sheet_name).fillna("")
+        data = data.to_dict(orient='records')
+        save_data = []
+        key_value = {"tableName": "已有工具（数独闯关不算）"}
+        for item in data:
+            temp = copy.deepcopy(key_value)
+            for k, v in item.items():
+                v = str(v)
+                if v == "NaT":
+                    v = ""
+                if k == '创建时间':
+                    v = str(v)[:10]
+                if k == "bookCategory":
+                    bookCategory = []
+                    if "K12" in v:
+                        bookCategory.append('K12')
+                    if "童书" in v:
+                        bookCategory.append('童书')
+                    if "大总生活" in v:
+                        bookCategory.append('其他图书')
+                    if "不限" in v:
+                        bookCategory.append('其他图书')
+                    if not bookCategory:
+                        bookCategory.append('其他图书')
+                    bookCategory = "；".join(bookCategory)
+                    v = bookCategory
+                if k in head2key6.keys():
+                    temp[head2key6[k]] = self.clean( v)
+            message = [temp['bookCategory'], temp['resourceName'], temp['resourceDescribe'],
+                       temp['subjectCategory'], temp['gradeDeep'],
+                       ]
+            message = [item for item in message if item]
+            message = list(set(message))
+            message = "；".join(message)
+            temp['message'] = message+ "；" + self.add_message(message)
+            save_data.append(temp)
+        return save_data
+    def result(self):
+        save_sheets = []
+        save_sheets_name = []
+        path = r"/home/liuxin/work/AI_planner/data/非AI工具"
+        # 1、伴学工具
+        excel_path = f"/home/liuxin/work/AI_planner/data/非AI工具/伴学工具-2025-02-14.xlsx"
+        sheet_name = 'Sheet1'
+        study_tool = self.study_tool( excel_path, sheet_name)  # 伴学工具
+        data = pd.DataFrame(study_tool)
+        save_sheets.append(copy.deepcopy(data))
+        save_sheets_name.append("伴学工具")
+        study_tool_demp = study_tool[:4]
+        print(json.dumps(study_tool_demp, ensure_ascii=False, indent=4))
+        # 2、第三方自有资源评级详表
+        excel_path = f"/home/liuxin/work/AI_planner/data/非AI工具/商务资源SOP管理表read.xlsx"
+        sheet_name = '第三方自有资源评级详表'
+        third_party = self.third_party_own_resource( excel_path, sheet_name)
+        data = pd.DataFrame(third_party)
+        save_sheets.append(copy.deepcopy(data))
+        save_sheets_name.append(sheet_name)
+        third_party_demp = third_party[:4]
+        print(json.dumps(third_party_demp, ensure_ascii=False, indent=4))
+        # 3、品牌资源（爱奇艺+慕课）
+        excel_path = f"/home/liuxin/work/AI_planner/data/非AI工具/商务资源SOP管理表read.xlsx"
+        sheet_name = '品牌资源（爱奇艺+慕课）'
+        brand_resource = self.brand_resource( excel_path, sheet_name)
+        data = pd.DataFrame(brand_resource)
+        save_sheets.append(copy.deepcopy(data))
+        save_sheets_name.append(sheet_name)
+        brand_resource_demp = brand_resource[:4]
+        print(json.dumps(brand_resource_demp, ensure_ascii=False, indent=4))
+        # 4、小睿资讯服务
+        excel_path = f"/home/liuxin/work/AI_planner/data/非AI工具/商务资源SOP管理表read.xlsx"
+        sheet_name = '小睿资讯服务'
+        xiaorui_information = self.xiaorui_information(excel_path, sheet_name)
+        data = pd.DataFrame(xiaorui_information)
+        save_sheets.append(copy.deepcopy(data))
+        save_sheets_name.append(sheet_name)
+        xiaorui_information_demp = xiaorui_information[:4]
+        print(json.dumps(xiaorui_information_demp, ensure_ascii=False, indent=4))
+        # 5、测评库资源
+        excel_path = f"/home/liuxin/work/AI_planner/data/非AI工具/商务资源SOP管理表read.xlsx"
+        sheet_name = '测评库资源'
+        evaluation_database = self.evaluation_database(excel_path, sheet_name)
+        data = pd.DataFrame(evaluation_database)
+        save_sheets.append(copy.deepcopy(data))
+        save_sheets_name.append(sheet_name)
+        evaluation_database_demp = evaluation_database[:4]
+        print(json.dumps(evaluation_database_demp, ensure_ascii=False, indent=4))
+        # 保存所欲 非AI资源进 json字典中
+        json_data = study_tool + third_party + brand_resource + xiaorui_information + evaluation_database
+        print(f"准备了{len(json_data)} 条数据写入es数据库.")  # 准备了18137 条数据写入es数据库.
+        file = os.path.join(path, "商务资源SOP管理表es.json")
+        with open(file, "w", encoding='utf-8') as f:
+            for id, item in enumerate(json_data):
+                item['id'] = id
+            f.write(json.dumps(json_data, ensure_ascii=False, indent=4))
+        # 将数据保存进excel的多个表中
+        with pd.ExcelWriter(excel_path[:-5] + "_es"  + ".xlsx") as writer:
+            for sheet_name, data in zip(save_sheets_name, save_sheets):
+                data.to_excel(writer, sheet_name=sheet_name, index=False)
+        # 所有的字段名称
+        keys_all = []
+        for source in [study_tool[0], third_party[0], brand_resource[0], xiaorui_information[0], evaluation_database[0]]:
+            for k,v in source.items():
+                keys_all.append(k)
+        keys_all = list(set(keys_all))
+        print(keys_all)  # ['id', 'studentSemester', 'audience', 'studentGrade', 'bookPrice', 'resourceDescribe', 'resourceContent', 'subjectORlabel', 'typeCode', 'tableName', 'gradeDeep', 'bookCategory', 'resourceFrom', 'createTime', 'raysAppId', 'resourceName', 'workType', 'schoolStage', 'bookVersion', 'purpose', 'message', 'authorName', 'subjectCategory', 'textBookEdition']
+        print(len(keys_all))  # 24
+if __name__ == "__main__":
+    prepare_es_data = prepareESdata()
+    prepare_es_data.result()
+    print("\nfinished.")
--- a/save_es_database.py
+++ b/save_es_database.py
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+import copy
+import json,re,sys,os
+from elasticsearch import Elasticsearch, helpers  #  elasticsearch==7.0.0； pandas==2.2.3；  numpy==2.0.2
+import jieba
+from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
+# 将 非AI资源 json 数据保存进es数据库
+class EsHelper:
+    def __init__(self, hosts=None):
+        """初始化ES连接
+        Args:
+            hosts: ES服务器地址列表，默认为None使用已配置的地址
+        """
+        if hosts is None:
+            self.hosts = [{'host': '116.63.110.220', 'port': 9200}]
+            self.hosts = [{'host': 'localhost', 'port': 9200}]
+            self.hosts = ['http://localhost:9200']  # http://116.63.110.220:9200
+        self.es = Elasticsearch(hosts=self.hosts, timeout=1000)
+        print("self.es.ping(): ", self.es.ping())
+    def create_index(self, index_name):
+        """创建索引
+        Args:
+            index_name: es索引名称
+            settings: 索引设置
+            mappings: 索引映射
+            ['tableName','bookCategory', 'typeCode', 'raysAppId', 'createTime',  'resourceName', 'subjectCategory', 'studentSemester',
+            'audience', 'studentGrade',   'resourceDescribe', 'resourceContent', 'subjectORlabel',
+             'gradeDeep',  'resourceFrom',
+            'workType', 'schoolStage', 'bookVersion', 'purpose', 'authorName',  'textBookEdition', 'message', 'bookPrice'
+            ]
+        Returns:
+            创建结果
+        """
+        body = {}
+        settings = {
+            "index": {
+                "number_of_shards": 1,  # 定义索引的分片数量 方便在不同的节点部署
+                "number_of_replicas": 0,  # 定义索引中每个主分片的副本数量
+            }}
+        mappings = {
+            "dynamic": True,
+            "properties": {
+                "tableName": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "bookCategory": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "typeCode": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "raysAppId": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "createTime": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "resourceName": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "subjectCategory": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "studentSemester": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "audience": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "studentGrade": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },  # 10
+                "resourceDescribe": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "resourceContent": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "subjectORlabel": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "gradeDeep": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "resourceFrom": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "workType": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "schoolStage": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "bookVersion": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "purpose": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "authorName": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },  # 20
+                "textBookEdition": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "message": {
+                    "type": "text",
+                    "fields": {
+                        "keyword": {
+                            "type": "keyword",
+                            "ignore_above": 256
+                        }
+                    }
+                },
+                "bookPrice": {
+                    "type": "float",
+                },
+            }
+        }
+        if settings:
+            body['settings'] = settings
+        if mappings:
+            body['mappings'] = mappings
+        res = self.es.indices.create(index=index_name, body=body, timeout='100s')
+        print(f"create index {index_name} success.")
+        return res
+    def delete_index(self, index_name):
+        """删除索引
+        Args:
+            index_name: 索引名称
+        Returns:
+            删除结果
+        """
+        res = self.es.indices.delete(index=index_name)
+        print(f"删除索引 {index_name} success.")
+        return res
+    def index_exists(self, index_name):
+        """判断索引是否存在
+        Args:
+            index_name: 索引名称
+        Returns:
+            bool: 是否存在
+        """
+        return self.es.indices.exists(index=index_name)
+    def read_dict(self, file):
+        # 1、读入中朝大辞典 ocr解析 并 gpt4o清洗后的数据
+        with open(file, 'r', encoding='utf-8') as f:
+            data_list = json.loads(f.read())
+            res = []
+            for item in data_list:
+                temp = {}
+                for k, v in item.items():
+                    if not v:
+                        pass
+                    else:
+                        temp[k] = v
+                res.append(copy.deepcopy(temp))
+            print("len(data_list): ", len(data_list))
+            print("len(res): ", len(res))
+        return res
+    def insert_doc(self, index_name, doc_body, doc_id=None):
+        """插入文档
+        Args:
+            index_name: 索引名称
+            doc_body: 文档内容
+            doc_id: 文档ID,可选
+        Returns:
+            插入结果
+        """
+        return self.es.index(index=index_name, body=doc_body, id=doc_id)
+    def insert_doc_batch(self, index_name, doc_batch: list):
+        # 批量插入es数据库
+        actions = []
+        for idx, body in enumerate(doc_batch):
+            if 'id' in body:
+                idx = body['id']
+            temp = {"_index": index_name, "_id": idx}
+            temp['_source'] = body
+            actions.append(temp)
+        # 使用helpers.bulk方法批量插入文档
+        helpers.bulk(self.es, actions)
+    def get_doc(self, index_name, doc_id):
+        """获取文档
+        Args:
+            index_name: 索引名称
+            doc_id: 文档ID
+        Returns:
+            文档内容
+        """
+        return self.es.get(index=index_name, id=doc_id)
+    def update_doc(self, index_name, doc_id, doc_body):
+        """更新文档
+        Args:
+            index_name: 索引名称
+            doc_id: 文档ID
+            doc_body: 更新的内容
+        Returns:
+            更新结果
+        """
+        return self.es.update(index=index_name, id=doc_id, body={'doc': doc_body})
+    def delete_doc(self, index_name, doc_id):
+        """删除文档
+        Args:
+            index_name: 索引名称
+            doc_id: 文档ID
+        Returns:
+            删除结果
+        """
+        return self.es.delete(index=index_name, id=doc_id)
+    def search(self, index_name, query_body):
+        """搜索文档
+        """
+        return self.es.search(index=index_name, body=query_body)
+class ESsearch():
+    # 搜索es数据库
+    def __init__(self, hosts=['http://localhost:9200']):
+        self.es = Elasticsearch(hosts=hosts, timeout=30)
+        self.index_name = 'no_ai_source'
+        self.size_study_tool = 5  # 伴学工具(这个表没有使用)
+        self.size_third_party_own_resource = 50  # 第三方自有资源评级详表  100
+        self.size_brand_resource = 35    # 品牌资源（爱奇艺+慕课）
+        self.size_xiaorui_information = 10   # 小睿资讯服务
+        self.size_evaluation_database = 5   # 测评库资源(这个表没有使用)
+        # 【学段："schoolStage" 小学】；【学科： "subjectCategory", 语文】；【年级： "studentGrade" 一年级】。
+    def execute_query(self, query=None,  size=None, offset=0):
+        # 执行 非AI资源包 es查询语句
+        if query is None:
+            query = {"bool": {
+                                  "should": [{"match_phrase": {"message": "K12"}}]
+                              }
+                    }
+        query_body = {"from": offset,
+                          "query": query
+                          }
+        if size:
+            query_body['size'] = size
+        res = self.es.search(index=self.index_name, body=query_body)
+        res_hits = []
+        if 'hits' in res and 'hits' in res['hits']:
+            hits = res['hits']['hits']
+            for item in hits:
+                if "_source" in item and item['_source']:
+                    res_hits.append(item['_source'])
+        for item in res_hits:
+            for k, v in item.items():
+                if  isinstance(v, str):
+                    v = re.sub('"', " ", v)  # 英文双引号转中文双引号
+                    v = re.sub("'", " ", v) # “
+                    v = re.sub("“", " ", v) #
+                    v = re.sub("”", " ", v) # ““”  中文双引号
+                    item[k] = v
+        return res_hits
+    def search_study_tool(self, input):
+        # 1、es搜索 伴学工具    全部是K12的资源
+        query = {"bool": {"should": [],
+                          "must":[]
+                          }
+                 }
+        must_query = [
+                      {"match_phrase": {"tableName": "伴学工具"}},
+            {"match_phrase": {"bookCategory": input['bookCategory']}}
+                      ]
+        should_query = []
+        for k, v in input.items():
+            if not v:
+                continue
+            if k in [ "subjectCategory"] and v:  # 学科 语数外
+                temp = {"match_phrase": {"message": v}}
+                must_query.append(temp)
+            else:
+                # temp = {"match": {"message": v}}
+                temp = {"match": {"resourceName": v}}
+                should_query.append(temp)
+        query['bool']['should'] = should_query
+        query['bool']['must'] = must_query
+        return query
+    def search_third_party_own_resource(self, input):
+        # 2、es搜索 第三方自有资源评级详表
+        # print("2、es搜索 第三方自有资源评级详表")
+        # print(input)
+        """
+        input = {
+            "subjectCategory": "语文",
+            "schoolStage": "小学",
+            "studentGrade": "二年级",
+            "purpose": "基础学习",
+            "bookVersion": "人教版",
+            "keyword": "现代汉语；古代汉语；中国文学常识；外国文学常识；阅读；写作；教学"
+        }
+        """
+        query = {"bool": {"should": [],
+                          "must": []
+                          }
+                 }
+        must_query = [
+                      {"match_phrase": {"tableName": "第三方自有资源评级详表"}},
+            {"match_phrase": {"bookCategory": input['bookCategory']}}
+                      ]
+        should_query = []
+        for k, v in input.items():
+            if not v:
+                continue
+            if input['subjectCategory']=="K12" and k in ["schoolStage", "studentGrade", "subjectCategory", "bookVersion"] and v:
+                v = v.strip().split("；")
+                v = list(set(v))
+                for v0 in v:
+                    temp = {"match_phrase": {"message": v0}}
+                    must_query.append(temp)
+            else:
+                # temp = {"match": {"message": v}}
+                temp = {"match": {"resourceName": v}}
+                should_query.append(temp)
+        query['bool']['should'] = should_query
+        query['bool']['must'] = must_query
+        return query
+    def search_brand_resource(self, input):
+        # 3、es搜索 品牌资源（爱奇艺+慕课）
+        query = {"bool": {"should": [],
+                          "must": []
+                          }
+                 }
+        must_query = [
+            {"match_phrase": {"tableName": "品牌资源（爱奇艺+慕课）"}},
+        {"match_phrase": {"bookCategory": input['bookCategory']}}
+        ]
+        should_query = []
+        for k, v in input.items():
+            if not v:
+                continue
+            if input['subjectCategory']=="K12" and k in ["schoolStage", "subjectCategory",  "studentGrade"] and v:
+                v = v.strip().split("；")
+                v = list(set(v))
+                for v0 in v:
+                    temp = {"match_phrase": {"message": v0}}
+                    must_query.append(temp)
+            else:
+                # temp = {"match": {"message": v}}
+                temp = {"match": {"resourceName": v}}
+                should_query.append(temp)
+        query['bool']['should'] = should_query
+        query['bool']['must'] = must_query
+        return query
+    def search_xiaorui_information(self, input):
+        # 4、es搜索 小睿资讯服务
+        query = {"bool": {"should": [],
+                          "must": []
+                          }
+                 }
+        must_query = [
+            {"match_phrase": {"tableName": "小睿资讯服务"}},
+        {"match_phrase": {"bookCategory": input['bookCategory']}}
+        ]
+        should_query = []
+        for k, v in input.items():
+            if not v:
+                continue
+            if input['bookCategory']=="K12" and k in ["schoolStage", "subjectCategory",  "studentGrade"] and v:
+                v = v.strip().split("；")
+                v = list(set(v))
+                for v0 in v:
+                    temp = {"match_phrase": {"message": v0}}
+                    must_query.append(temp)
+            else:
+                # temp = {"match": {"message": v}}
+                temp = {"match": {"resourceName": v}}
+                should_query.append(temp)
+        query['bool']['should'] = should_query
+        query['bool']['must'] = must_query
+        return query
+    def search_evaluation_database(self, input):
+        # 5、es搜索 测评库资源
+        query = {"bool": {"should": [],
+                          "must": []
+                          }
+                 }
+        must_query = [
+            {"match_phrase": {"tableName": "测评库资源"}},
+            {"match_phrase": {"bookCategory": input['bookCategory']}}
+        ]
+        should_query = []
+        for k, v in input.items():
+            if not v:
+                continue
+            if input['bookCategory']=="K12" and k in ["schoolStage", "subjectCategory",  "studentGrade"] and v:
+                v = v.strip().split("；")
+                v = list(set(v))
+                for v0 in v:
+                    if not v0:
+                        continue
+                    temp = {"match_phrase": {"message": v0}}
+                    must_query.append(temp)
+            else:
+                # temp = {"match": {"message": v}}
+                temp = {"match": {"resourceName": v}}
+                should_query.append(temp)
+        query['bool']['should'] = should_query
+        query['bool']['must'] = must_query
+        return query
+    def search_no_ai_source(self, input):
+        query_study_tool = self.search_study_tool(input)
+        query_third_party_own_resource = self.search_third_party_own_resource(input)
+        query_brand_resource = self.search_brand_resource(input)
+        query_xiaorui_information = self.search_xiaorui_information(input)
+        query_evaluation_database = self.search_evaluation_database(input)
+        # K12 图书
+        if input['bookCategory'] == "K12":
+            # 并行执行5个函数
+            with ThreadPoolExecutor(max_workers=5) as executor:
+                res1 = executor.submit(self.execute_query, query=query_study_tool, size=self.size_study_tool)
+                res2 = executor.submit(self.execute_query, query=query_third_party_own_resource, size=self.size_third_party_own_resource)
+                res3 = executor.submit(self.execute_query, query=query_brand_resource, size=self.size_brand_resource)
+                res4 = executor.submit(self.execute_query, query=query_xiaorui_information, size=self.size_xiaorui_information)
+                res5 = executor.submit(self.execute_query, query=query_evaluation_database, size=self.size_evaluation_database)
+                # 等待所有任务完成并获取结果
+                res1 = res1.result()
+                res2 = res2.result()
+                res3 = res3.result()
+                res4 = res4.result()
+                res5 = res5.result()
+                res = res1 + res2 + res3 + res4 + res5
+        # 童书 和 其他图书
+        else:
+            # 并行执行4个函数
+            with ThreadPoolExecutor(max_workers=4) as executor:
+                res2 = executor.submit(self.execute_query, query=query_third_party_own_resource, size=self.size_third_party_own_resource)
+                res3 = executor.submit(self.execute_query, query=query_brand_resource, size=self.size_brand_resource)
+                res4 = executor.submit(self.execute_query, query=query_xiaorui_information, size=self.size_xiaorui_information)
+                res5 = executor.submit(self.execute_query, query=query_evaluation_database, size=self.size_evaluation_database)
+                # 等待所有任务完成并获取结果
+                res2 = res2.result()
+                res3 = res3.result()
+                res4 = res4.result()
+                res5 = res5.result()
+                res = res2 + res3 + res4 + res5
+        return res
+    def search(self, input):
+        # 后端调用的搜索接口
+        if input['bookCategory'] not in ["K12", "童书"]:
+            input['bookCategory'] = "其他图书"
+        if "tableName" not in input.keys() or not input['tableName']:
+            res = self.search_no_ai_source(input)
+        elif "tableName"  in input.keys() and input['tableName'] =="伴学工具":
+            query_study_tool = self.search_study_tool(input)
+            res = self.execute_query( query = query_study_tool, size = self.size_study_tool)
+        elif "tableName" in input.keys() and input['tableName'] == "第三方自有资源评级详表":
+            query_third_party_own_resource = self.search_third_party_own_resource(input)
+            res = self.execute_query(query=query_third_party_own_resource, size=self.size_third_party_own_resource)
+        elif "tableName" in input.keys() and input['tableName'] == "品牌资源（爱奇艺+慕课）":
+            query_brand_resource = self.search_brand_resource(input)
+            res = self.execute_query(query=query_brand_resource, size=self.size_brand_resource)
+        elif "tableName" in input.keys() and input['tableName'] == "小睿资讯服务":
+            query_xiaorui_information = self.search_xiaorui_information(input)
+            res = self.execute_query(query=query_xiaorui_information, size=self.size_xiaorui_information)
+        elif "tableName" in input.keys() and input['tableName'] == "测评库资源":
+            query_evaluation_database = self.search_evaluation_database(input)
+            res = self.execute_query(query=query_evaluation_database, size=self.size_evaluation_database)
+        else:
+            res = [{"error":"tableName must in 【伴学工具；第三方自有资源评级详表；品牌资源（爱奇艺+慕课）；小睿资讯服务；测评库资源】 "}]
+        # 踢出为空的字段
+        for item in res:
+            for k, v in item.items():
+                if not v:
+                    item.pop(k)
+        return res
+if __name__ == "__main__":
+    index_name = "no_ai_source"  # 非AI资源 数据库
+    es = EsHelper()
+    # # #es.delete_index( index_name) # 删除es索引
+    # print(es.create_index("test_index", {"number_of_shards": 1, "number_of_replicas": 0}))
+    # print(es.search("test_index", {"query": {"match_all": {}}}))
+    # # # 1、创建新的es索引
+    # if es.index_exists( index_name):
+    #     es.delete_index(index_name)  # 删除索引
+    # create_index_res = es.create_index(index_name)  # 建立一个新的es 索引
+    # print('create_index_res: ', create_index_res)
+    #
+    # # # 2、es中插入数据
+    # file = '/home/liuxin/work/AI_planner/data/非AI工具/商务资源SOP管理表es.json'
+    # data_list = es.read_dict( file)  # 18169
+    # es.insert_doc_batch( index_name, data_list)
+    # res = es.get_doc(index_name, 18128)
+    # print(res)
+    #
+    # # 3、查看es中有多少条数据
+    # count = es.es.count(index_name)
+    # print(f'{index_name} ：{count} ')  #
+    ######  es 搜索 非AI资源数据
+    es_search = ESsearch()
+    input = {
+"bookCategory": "童书",
+"subjectCategory": "语文",
+"keyword": "查询方便、词库齐全"
+}
+    hits = es_search.search(input)
+    # hits = es_search.search_no_ai_source(input)
+    hits = json.dumps(hits, ensure_ascii=False, indent=4)
+    print(hits)
+    print('\nfinished.')
\ No newline at end of file
--- a/screenshot/screenshot_service.py
+++ b/screenshot/screenshot_service.py
+#!/usr/bin/env python3
+import asyncio
+import base64
+import argparse
+from playwright.async_api import async_playwright
+from fastapi.responses import JSONResponse
+import time
+import re, json
+import uuid
+import uvicorn
+from fastapi import FastAPI, Query, HTTPException
+from pydantic import BaseModel, HttpUrl
+from typing import Optional
+import requests
+import os
+import mimetypes
+from typing import Dict, Optional, Union, Tuple
+from urllib.parse import quote
+# from screenshot import capture_screenshot
+async def capture_screenshot(url, width=1280, height=800, save_path=None):
+    """
+    Capture a screenshot of a webpage and return as base64 encoded string.
+    Args:
+        url (str): The URL to capture
+        width (int): Viewport width
+        height (int): Viewport height
+    Returns:
+        str: Base64 encoded screenshot data
+    """
+    timestamp = time.time()
+    timestamp = str(timestamp)
+    timestamp = re.sub(r"\.", "_", timestamp)
+    async with async_playwright() as p:
+        browser = await p.chromium.launch()
+        page = await browser.new_page(viewport={'width': width, 'height': height})
+        try:
+            await page.goto(url, wait_until='networkidle')
+        except Exception as e:
+            await page.goto(url, wait_until='load')
+        screenshot_bytes = await page.screenshot(full_page=True, path=save_path)
+        await browser.close()
+        # Convert to base64
+        base64_screenshot = base64.b64encode(screenshot_bytes).decode('utf-8')
+        # base64_screenshot = screenshot_bytes
+        # 保存图片
+        if save_path:
+            with open(save_path, 'wb') as f:
+                f.write(screenshot_bytes)
+            # print(f"Screenshot saved to {args.output}")
+        return base64_screenshot
+app = FastAPI(title="Screenshot Service")
+class Input(BaseModel):
+    url: str = ""
+    width: int = 1280
+    height: int = 800
+class ScreenshotResponse(BaseModel):
+    url: str
+    base64_image: str
+    width: int
+    height: int
+class OBSUploader:
+    def __init__(self, base_url: str = "https://open.raysgo.com", auth_token: Optional[str] = None):
+        """
+        Initialize the OBS uploader.
+        Args:
+            base_url: The base URL for the API
+            auth_token: The authorization token for API access
+        """
+        self.base_url = base_url.rstrip('/')
+        self.auth_token = auth_token
+        self.headers = {
+            'Authorization': f'Bearer {auth_token}' if auth_token else None
+        }
+        # Initialize mimetypes
+        mimetypes.init()
+    def _get_content_type(self, file_path: Union[str, bytes]) -> Tuple[str, bytes]:
+        """
+        Get content type and file content from file path or bytes.
+        Args:
+            file_path: Path to the file or file content as bytes
+        Returns:
+            Tuple of (content_type, file_content)
+        """
+        if isinstance(file_path, str):
+            if not os.path.exists(file_path):
+                raise FileNotFoundError(f"File not found: {file_path}")
+            content_type, _ = mimetypes.guess_type(file_path)
+            with open(file_path, 'rb') as f:
+                file_content = f.read()
+        else:
+            file_content = file_path
+            # For bytes input, try to detect type from first few bytes
+            content_type = 'application/octet-stream'  # Default content type
+        return content_type or 'application/octet-stream', file_content
+    def get_upload_url(self, biz_code: str, object_name: str, content_type: str) -> Dict:
+        """
+        Get a temporary upload URL for the specified object.
+        Args:
+            biz_code: Business code for the upload
+            object_name: Name/path of the object to upload
+            content_type: MIME type of the file
+        Returns:
+            Dict containing the upload URL and related information
+        """
+        endpoint = f"{self.base_url}/aimodel/v1.0/obs/getCreatePostSignature"
+        params = {
+            'bizCode': biz_code,
+            'objectName': object_name,
+            'mimeType': content_type
+        }
+        response = requests.get(endpoint, params=params, headers=self.headers)
+        response.raise_for_status()
+        return response.json()
+    def upload_file(self, file_path: Union[str, bytes], biz_code: str, object_name: str) -> Dict:
+        """
+        Upload a file using temporary credentials.
+        Args:
+            file_path: Path to the file to upload or file content as bytes
+            biz_code: Business code for the upload
+            object_name: Name/path of the object to upload
+        Returns:
+            Dict containing the upload result and file URL
+        """
+        # Get content type and file content
+        content_type, file_content = self._get_content_type(file_path)
+        # Get temporary upload URL with content type
+        upload_info = self.get_upload_url(biz_code, object_name, content_type)
+        if upload_info['errCode'] != 0:
+            raise Exception(f"Failed to get upload URL: {upload_info['message']}")
+        upload_url = upload_info['data']['temporarySignatureUrl']
+        # Upload the file with the correct content type
+        headers = {
+            'Content-Type': content_type,
+            'Content-Length': str(len(file_content))
+        }
+        response = requests.put(upload_url, data=file_content, headers=headers)
+        response.raise_for_status()
+        return {
+            'success': True,
+            'file_url': upload_info['data']['domain'] + '/' + object_name,
+            'object_url_map': upload_info['data']['objectUrlMap']
+        }
+@app.post("/screenshot/")
+async def get_screenshot(input: Input):
+    save_images_path = "images"
+    if not os.path.exists(save_images_path):
+        # 创建单级或多级目录（自动处理父目录）
+        os.makedirs(save_images_path)
+    file_md5 = uuid.uuid4().hex
+    try:
+        base64_image = await capture_screenshot(input.url, width=input.width, height=input.height)
+        res = {}
+        # 保存base64图片
+        base64_image = base64.b64decode(base64_image)
+        save_file = f"screenshot_{file_md5}.jpg"
+        save_file = os.path.join(save_images_path, save_file)
+        with open(save_file, "wb") as f:
+            f.write(base64_image)
+        uploader = OBSUploader(auth_token="dcg-4c1e3a7f4fcd415e8c93151ff539d20a")
+        # Upload a file 上传图片方便浏览器查看
+        try:
+            result = uploader.upload_file(
+                file_path=save_file,
+                biz_code="test",
+                object_name=f"screenshot/{uuid.uuid4().hex}.jpg"
+            )
+            print(f"File uploaded successfully! URL: {result['file_url']}")
+            res["obs_url"] = result["file_url"]
+        except Exception as e:
+            print(f"Upload failed: {str(e)}")
+            res["obs_url"] = ''
+        res = JSONResponse(status_code=200, content=res)
+    except Exception as e:
+        res["obs_url"] = ''
+        res = JSONResponse(status_code=500, content={"message": str(e)})
+    return res
+@app.get("/")
+async def root():
+    return {
+        "service": "Screenshot Service",
+        "usage": "GET /screenshot?url=https://example.com&width=1280&height=800"
+    }
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=19801)
+    # 对指定网页进行截屏
+    # 部署的服务器 116.63.110.220
+    # sudo docker run -itd --name playwright -p 19801:19801 -v /home/liuxin/work:/home/work playwright:v1.2  /bin/bash
+    # sudo docker exec -it playwright bash
+    # cd /home/work/AI_planner/screenshot
+    # nohup python -u screenshot_service.py > screenshot_service.log 2>&1 &  # 启动服务
\ No newline at end of file
--- a/screenshot/tst.py
+++ b/screenshot/tst.py
+import requests
+import json
+from urllib.parse import quote
+import base64
+from PIL import Image
+import io
+# API 端点 URL（请替换为实际接口地址）
+# api_url = "http://116.63.110.220:19801/screenshot"
+api_url = "http://localhost:19801/screenshot"
+# 请求参数
+url = "https://www.icourse163.org/course/WUST-1206144803?from=searchPage&outVendor=zw_mooc_pcssjg_"
+url = "https://www.baidu.com/"
+params = {
+    "url": url,   #   需要截图的目标 URL，
+    "width": 1290,  # 指定宽度
+    "height": 700  # 指定高度
+}
+params = json.dumps(params, ensure_ascii=False)
+try:
+    # 发送 GET 请求
+    response = requests.post(api_url, data=params)
+    # 检查响应状态码
+    if response.status_code == 200:
+        data = response.json()
+        obs_url = data['obs_url']
+        print("目标网站截屏保存地址：", obs_url)
+    else:
+        print("error")
+except Exception as e:
+    print(f"发生异常: {str(e)}")
\ No newline at end of file
--- a/screenshot/web_sever.py
+++ b/screenshot/web_sever.py
+#!/usr/bin/env python3
+import asyncio
+import base64
+import argparse
+from playwright.async_api import async_playwright
+from fastapi.responses import JSONResponse
+import time
+import re, json
+import uvicorn
+from fastapi import FastAPI, Query, HTTPException
+from pydantic import BaseModel, HttpUrl
+from typing import Optional
+# from screenshot import capture_screenshot
+async def capture_screenshot(url, width=1280, height=800, save_path=None):
+    """
+    Capture a screenshot of a webpage and return as base64 encoded string.
+    Args:
+        url (str): The URL to capture
+        width (int): Viewport width
+        height (int): Viewport height
+    Returns:
+        str: Base64 encoded screenshot data
+    """
+    timestamp = time.time()
+    timestamp = str(timestamp)
+    timestamp = re.sub(r"\.", "_", timestamp)
+    async with async_playwright() as p:
+        browser = await p.chromium.launch()
+        page = await browser.new_page(viewport={'width': width, 'height': height})
+        try:
+            await page.goto(url, wait_until='networkidle')
+        except Exception as e:
+            await page.goto(url, wait_until='load')
+        screenshot_bytes = await page.screenshot(full_page=True, path=save_path)
+        await browser.close()
+        # Convert to base64
+        base64_screenshot = base64.b64encode(screenshot_bytes).decode('utf-8')
+        # base64_screenshot = screenshot_bytes
+        # 保存图片
+        if save_path:
+            with open(save_path, 'wb') as f:
+                f.write(screenshot_bytes)
+            # print(f"Screenshot saved to {args.output}")
+        return base64_screenshot
+app = FastAPI(title="Screenshot Service")
+class Input(BaseModel):
+    url: str = ""
+    width: int = 1280
+    height: int = 800
+class ScreenshotResponse(BaseModel):
+    url: str
+    base64_image: str
+    width: int
+    height: int
+@app.post("/screenshot/")
+async def get_screenshot(input: Input):
+    try:
+        input = json.loads(input.json())
+        url = input['url']
+        width = input['width']
+        height = input['height']
+        base64_image = await capture_screenshot(url, width=width, height=height)
+        res = {"base64_image":base64_image, "url":url, "width":width, "height":height}
+        res = JSONResponse(status_code=200, content=res)
+    except Exception as e:
+        res = JSONResponse(status_code=500,  content={"message": str(e)} )
+    return res
+@app.get("/")
+async def root():
+    return {
+        "service": "Screenshot Service",
+        "usage": "GET /screenshot?url=https://example.com&width=1280&height=800"
+    }
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=19801)
+    # 对指定网页进行截屏
+    # sudo docker run -itd --name playwright -p 19801:19801 -v /home/liuxin/work:/home/work playwright:v1.1  /bin/bash
+    # sudo docker exec -it playwright bash
+    # cd /home/work/AI_planner/screenshot
+    # python web_server.py  # 启动服务
\ No newline at end of file