__author__ = 'liying' __date__ = 'May 2025' __copyright__ = '(C) 2025, liying' import os import time import psycopg2 import matplotlib.pyplot as plt from matplotlib import font_manager import numpy as np import siwei_config from PyQt5.QtWidgets import QDialog, QVBoxLayout from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas from qgis.core import ( QgsProcessingAlgorithm, QgsProcessingParameterProviderConnection, QgsProcessingParameterDatabaseSchema, QgsProcessingParameterString, QgsProcessingParameterFile, QgsProcessingParameterBoolean ) from qgis.utils import iface # 设置字体以支持中文 def set_matplotlib_font(): try: font_path = 'C:/Windows/Fonts/msyh.ttc' prop = font_manager.FontProperties(fname=font_path) plt.rcParams['font.family'] = prop.get_name() except Exception as e: print(f"字体设置失败:{e}") class DataStorageStatistics(QgsProcessingAlgorithm): DATABASE = 'DATABASE' SCHEMA = 'SCHEMA' TABLE = 'TABLE' EXPORT_DIR = 'EXPORT_DIR' EXPORT_CHARTS = 'EXPORT_CHARTS' def initAlgorithm(self, config=None): self.addParameter(QgsProcessingParameterProviderConnection( self.DATABASE, '数据库连接', 'postgres', defaultValue=siwei_config['db']['host'])) self.addParameter(QgsProcessingParameterDatabaseSchema( self.SCHEMA, '模式', connectionParameterName=self.DATABASE, defaultValue='base')) self.addParameter(QgsProcessingParameterString( self.TABLE, '表名', defaultValue='t_vector_storage')) self.addParameter(QgsProcessingParameterFile( self.EXPORT_DIR, '图表导出目录', behavior=QgsProcessingParameterFile.Folder, optional=True)) self.addParameter(QgsProcessingParameterBoolean( self.EXPORT_CHARTS, '是否导出图表', defaultValue=True)) def processAlgorithm(self, parameters, context, feedback): connection_params = { 'host': siwei_config['db']['host'], 'port': siwei_config['db']['port'], 'dbname': siwei_config['db']['name'], 'user': siwei_config['db']['user'], 'password': siwei_config['db']['password'], } schema = self.parameterAsString(parameters, self.SCHEMA, context) table = self.parameterAsString(parameters, self.TABLE, context) full_table = f'"{schema}"."{table}"' export_dir = self.parameterAsString(parameters, self.EXPORT_DIR, context) or os.path.expanduser( '~/qgis_stat_exports') os.makedirs(export_dir, exist_ok=True) export_charts = self.parameterAsBoolean(parameters, self.EXPORT_CHARTS, context) conn = psycopg2.connect(**connection_params) conn.autocommit = True cursor = conn.cursor() fields = ['ywlx', 'sjlx', 'glbm'] stat_results = {} for field in fields: feedback.pushInfo(f"正在统计字段:{field}") cursor.execute(f""" SELECT {field}, COUNT(*) FROM {full_table} WHERE {field} IS NOT NULL GROUP BY {field} ORDER BY COUNT(*) DESC """) data = cursor.fetchall() stat_results[field] = data for row in data: feedback.pushInfo(f"{row[0]}: {row[1]}") feedback.pushInfo("正在统计数据文件大小...") size_map = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0} found_tables = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0} missing_tables = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0} # 特别处理 osgb 类型的数据 feedback.pushInfo("正在处理 osgb 类型数据...") cursor.execute(f""" SELECT id, sjywz FROM {full_table} WHERE sjlx = 'osgb' AND sjywz IS NOT NULL """) osgb_rows = cursor.fetchall() feedback.pushInfo(f"找到 {len(osgb_rows)} 条 osgb 数据记录") # 为 osgb 类型计算总大小 total_osgb_size = 0 osgb_files_count = 0 missing_osgb_files = 0 osgb_file_sizes = [] # 用于统计各个文件的大小 for osgb_id, sjywz in osgb_rows: if not sjywz: missing_osgb_files += 1 feedback.pushWarning(f"ID为 {osgb_id} 的 osgb 记录 sjywz 字段为空") continue try: # 检查文件路径是否存在 if not os.path.exists(sjywz): missing_osgb_files += 1 feedback.pushWarning(f"找不到 osgb 文件路径: {sjywz}") continue # 如果 sjywz 是文件夹,则计算文件夹总大小 if os.path.isdir(sjywz): folder_size = 0 for dirpath, dirnames, filenames in os.walk(sjywz): for filename in filenames: file_path = os.path.join(dirpath, filename) if os.path.exists(file_path): folder_size += os.path.getsize(file_path) file_size = folder_size feedback.pushInfo(f"OSGB文件夹 {sjywz} 总大小: {round(file_size / 1024 / 1024, 2)} MB") else: # 如果 sjywz 是文件,则直接获取文件大小 file_size = os.path.getsize(sjywz) feedback.pushInfo(f"OSGB文件 {sjywz} 大小: {round(file_size / 1024 / 1024, 2)} MB") total_osgb_size += file_size osgb_files_count += 1 osgb_file_sizes.append((sjywz, file_size)) except Exception as e: missing_osgb_files += 1 feedback.pushWarning(f"读取 osgb 文件 {sjywz} 失败: {str(e)}") # 更新 osgb 类型的统计信息 size_map['osgb'] = total_osgb_size found_tables['osgb'] = osgb_files_count missing_tables['osgb'] = missing_osgb_files # 输出 osgb 统计结果 feedback.pushInfo(f"\nOSGB 文件统计结果:") feedback.pushInfo(f"总文件数: {osgb_files_count}") feedback.pushInfo(f"缺失文件数: {missing_osgb_files}") feedback.pushInfo(f"总大小: {round(total_osgb_size / 1024 / 1024, 2)} MB") # 输出最大的几个 osgb 文件 if osgb_file_sizes: osgb_file_sizes.sort(key=lambda x: x[1], reverse=True) feedback.pushInfo("\n最大的5个 OSGB 文件:") for i, (file_path, size) in enumerate(osgb_file_sizes[:5], 1): feedback.pushInfo(f"{i}. {file_path}: {round(size / 1024 / 1024, 2)} MB") # 处理其他数据类型 cursor.execute(f""" SELECT sjlx, name, sjywz FROM {full_table} WHERE sjlx IN ('vector', 'raster', 'table') AND sjlx != 'osgb' """) rows = cursor.fetchall() for sjlx, name, sjywz in rows: try: if sjlx in ['vector', 'raster']: if not name: continue table_name_candidates = self.get_table_name_candidates(name) else: # sjlx == 'table' if not sjywz: continue table_name_candidates = self.get_table_name_candidates(sjywz) size_found = False for full_target in table_name_candidates: try: cursor.execute("SELECT pg_total_relation_size(%s);", (full_target,)) size = cursor.fetchone() if size and size[0]: size_map[sjlx] += size[0] found_tables[sjlx] += 1 size_found = True break except Exception: continue if not size_found: missing_tables[sjlx] += 1 feedback.pushWarning(f"无法找到表:{name if sjlx in ['vector', 'raster'] else sjywz}") except Exception as e: feedback.pushInfo(f"处理表时出错:{name if sjlx in ['vector', 'raster'] else sjywz},错误信息:{str(e)}") feedback.pushInfo("\n表大小统计结果:") data_type_names = { 'vector': '矢量数据', 'raster': '栅格数据', 'table': '附件资料', 'osgb': '三维数据' } for sjlx in size_map: feedback.pushInfo( f"{data_type_names[sjlx]}: 找到 {found_tables[sjlx]} 个表/文件,缺失 {missing_tables[sjlx]} 个,总大小 {round(size_map[sjlx] / 1024 / 1024, 2)} MB") conn.close() # 图表绘制 set_matplotlib_font() timestamp = time.strftime("%Y%m%d_%H%M%S") fig, axes = plt.subplots(2, 2, figsize=(14, 12)) field_names = {'ywlx': '业务类型', 'sjlx': '数据类型', 'glbm': '管理部门'} for idx, field in enumerate(fields): data = stat_results.get(field, []) labels = [str(r[0]) for r in data] sizes = [r[1] for r in data] ax = axes[idx // 2][idx % 2] if field == 'ywlx': self.plot_donut(ax, labels, sizes, f"{field_names[field]} 分布") elif field == 'glbm': self.plot_line(ax, labels, sizes, f"{field_names[field]} 分布") else: self.plot_pie(ax, labels, sizes, f"{field_names[field]} 分布") size_labels = ['矢量数据', '栅格数据', '附件资料', '三维数据'] original_keys = ['vector', 'raster', 'table', 'osgb'] size_values = [round(size_map[key] / 1024 / 1024, 2) for key in original_keys] self.plot_bar(axes[1][1], size_labels, size_values, "数据类型大小 (MB)") plt.suptitle("数据字段分布与存储大小统计", fontsize=16) plt.tight_layout(rect=[0, 0.03, 1, 0.95]) if export_charts: png_path = os.path.join(export_dir, f"data_storage_stats_{timestamp}.png") plt.savefig(png_path, dpi=300) feedback.pushInfo(f"图表已保存至:{png_path}") self.show_matplotlib_dialog(fig) return {'结果': '统计完成'} def get_table_name_candidates(self, table_ref): candidates = [table_ref] if '.' in table_ref: parts = table_ref.split('.') if len(parts) == 2: candidates.extend([ f'"{parts[0]}"."{parts[1]}"', f'{parts[0]}."{parts[1]}"', f'"{parts[0]}".{parts[1]}' ]) if '_' in table_ref: candidates.append(table_ref.split('_', 1)[1]) if table_ref.startswith('hhht_'): candidates.append(table_ref[5:]) return list(set(candidates)) def plot_donut(self, ax, labels, sizes, title): if not sizes or sum(sizes) == 0: ax.axis('off') ax.set_title(f"{title}\n(无数据)") return if len(labels) > 6: others_sum = sum(sizes[5:]) labels = labels[:5] + ["其他"] sizes = sizes[:5] + [others_sum] wedges, texts, autotexts = ax.pie( sizes, labels=labels, autopct='%1.1f%%', startangle=90, wedgeprops=dict(width=0.4), textprops={'fontsize': 9}) for i, autotext in enumerate(autotexts): autotext.set_text(f'{sizes[i]}') ax.set_title(title) ax.axis('equal') ax.legend(wedges, [f'{l}: {s}' for l, s in zip(labels, sizes)], loc="best", fontsize=8) def plot_pie(self, ax, labels, sizes, title): if not sizes or sum(sizes) == 0: ax.axis('off') ax.set_title(f"{title}\n(无数据)") return if len(labels) > 6: others_sum = sum(sizes[5:]) labels = labels[:5] + ["其他"] sizes = sizes[:5] + [others_sum] wedges, texts, autotexts = ax.pie( sizes, labels=labels, autopct='%1.1f%%', startangle=90, textprops={'fontsize': 9}) for i, autotext in enumerate(autotexts): autotext.set_text(f'{sizes[i]}') ax.set_title(title) ax.axis('equal') ax.legend(wedges, [f'{l}: {s}' for l, s in zip(labels, sizes)], loc="best", fontsize=8) def plot_line(self, ax, labels, sizes, title): if not sizes: ax.axis('off') ax.set_title(f"{title}\n(无数据)") return ax.plot(labels, sizes, marker='o', linestyle='-', color='teal') ax.set_title(title) ax.set_xlabel("管理部门") ax.set_ylabel("数量") for i, (x, y) in enumerate(zip(labels, sizes)): ax.annotate(f'{y}', (x, y), textcoords="offset points", xytext=(0, 5), ha='center', fontsize=9) ax.tick_params(axis='x', rotation=30) def plot_bar(self, ax, labels, sizes, title): if not sizes or sum(sizes) == 0: ax.axis('off') ax.set_title(f"{title}\n(无数据)") return bars = ax.bar(labels, sizes, color='cornflowerblue') ax.set_title(title) ax.set_xlabel('数据类型') ax.set_ylabel('大小 (MB)') for bar in bars: height = bar.get_height() ax.annotate(f'{height:.2f} MB', xy=(bar.get_x() + bar.get_width() / 2, height), xytext=(0, 3), textcoords="offset points", ha='center', va='bottom', fontsize=9) def show_matplotlib_dialog(self, fig): dialog = QDialog(iface.mainWindow()) dialog.setWindowTitle("统计图表") layout = QVBoxLayout() canvas = FigureCanvas(fig) layout.addWidget(canvas) dialog.setLayout(layout) dialog.resize(1200, 900) dialog.exec_() def name(self): return 'DataStorageStatistics' def displayName(self): return '数据统计' def group(self): return '数据分析工具' def groupId(self): return 'data_storage_analysis' def createInstance(self): return DataStorageStatistics()