|
@@ -0,0 +1,368 @@
|
|
|
+__author__ = 'liying'
|
|
|
+__date__ = 'May 2025'
|
|
|
+__copyright__ = '(C) 2025, liying'
|
|
|
+
|
|
|
+import os
|
|
|
+import time
|
|
|
+import psycopg2
|
|
|
+import matplotlib.pyplot as plt
|
|
|
+from matplotlib import font_manager
|
|
|
+import numpy as np
|
|
|
+import siwei_config
|
|
|
+
|
|
|
+from PyQt5.QtWidgets import QDialog, QVBoxLayout
|
|
|
+from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
|
|
|
+
|
|
|
+from qgis.core import (
|
|
|
+ QgsProcessingAlgorithm,
|
|
|
+ QgsProcessingParameterProviderConnection,
|
|
|
+ QgsProcessingParameterDatabaseSchema,
|
|
|
+ QgsProcessingParameterString,
|
|
|
+ QgsProcessingParameterFile,
|
|
|
+ QgsProcessingParameterBoolean
|
|
|
+)
|
|
|
+from qgis.utils import iface
|
|
|
+
|
|
|
+
|
|
|
+# 设置字体以支持中文
|
|
|
+def set_matplotlib_font():
|
|
|
+ try:
|
|
|
+ font_path = 'C:/Windows/Fonts/msyh.ttc'
|
|
|
+ prop = font_manager.FontProperties(fname=font_path)
|
|
|
+ plt.rcParams['font.family'] = prop.get_name()
|
|
|
+ except Exception as e:
|
|
|
+ print(f"字体设置失败:{e}")
|
|
|
+
|
|
|
+
|
|
|
+class DataStorageStatistics(QgsProcessingAlgorithm):
|
|
|
+ DATABASE = 'DATABASE'
|
|
|
+ SCHEMA = 'SCHEMA'
|
|
|
+ TABLE = 'TABLE'
|
|
|
+ EXPORT_DIR = 'EXPORT_DIR'
|
|
|
+ EXPORT_CHARTS = 'EXPORT_CHARTS'
|
|
|
+
|
|
|
+ def initAlgorithm(self, config=None):
|
|
|
+ self.addParameter(QgsProcessingParameterProviderConnection(
|
|
|
+ self.DATABASE, '数据库连接', 'postgres', defaultValue=siwei_config.CONFIG['db']['host']))
|
|
|
+
|
|
|
+ self.addParameter(QgsProcessingParameterDatabaseSchema(
|
|
|
+ self.SCHEMA, '模式', connectionParameterName=self.DATABASE, defaultValue='base'))
|
|
|
+
|
|
|
+ self.addParameter(QgsProcessingParameterString(
|
|
|
+ self.TABLE, '表名', defaultValue='t_vector_storage'))
|
|
|
+
|
|
|
+ self.addParameter(QgsProcessingParameterFile(
|
|
|
+ self.EXPORT_DIR, '图表导出目录', behavior=QgsProcessingParameterFile.Folder, optional=True))
|
|
|
+
|
|
|
+ self.addParameter(QgsProcessingParameterBoolean(
|
|
|
+ self.EXPORT_CHARTS, '是否导出图表', defaultValue=True))
|
|
|
+
|
|
|
+ def processAlgorithm(self, parameters, context, feedback):
|
|
|
+ connection_params = {
|
|
|
+ 'host': siwei_config.CONFIG['db']['host'],
|
|
|
+ 'port': siwei_config.CONFIG['db']['port'],
|
|
|
+ 'dbname': siwei_config.CONFIG['db']['name'],
|
|
|
+ 'user': siwei_config.CONFIG['db']['user'],
|
|
|
+ 'password': siwei_config.CONFIG['db']['password'],
|
|
|
+ "connect_timeout": 10,
|
|
|
+ }
|
|
|
+
|
|
|
+ schema = self.parameterAsString(parameters, self.SCHEMA, context)
|
|
|
+ table = self.parameterAsString(parameters, self.TABLE, context)
|
|
|
+ full_table = f'"{schema}"."{table}"'
|
|
|
+ export_dir = self.parameterAsString(parameters, self.EXPORT_DIR, context) or os.path.expanduser(
|
|
|
+ '~/qgis_stat_exports')
|
|
|
+ os.makedirs(export_dir, exist_ok=True)
|
|
|
+ export_charts = self.parameterAsBoolean(parameters, self.EXPORT_CHARTS, context)
|
|
|
+
|
|
|
+ conn = psycopg2.connect(**connection_params)
|
|
|
+ conn.autocommit = True
|
|
|
+ cursor = conn.cursor()
|
|
|
+
|
|
|
+ fields = ['ywlx', 'sjlx', 'glbm']
|
|
|
+ stat_results = {}
|
|
|
+
|
|
|
+ for field in fields:
|
|
|
+ feedback.pushInfo(f"正在统计字段:{field}")
|
|
|
+ cursor.execute(f"""
|
|
|
+ SELECT {field}, COUNT(*) FROM {full_table}
|
|
|
+ WHERE {field} IS NOT NULL
|
|
|
+ GROUP BY {field}
|
|
|
+ ORDER BY COUNT(*) DESC
|
|
|
+ """)
|
|
|
+ data = cursor.fetchall()
|
|
|
+ stat_results[field] = data
|
|
|
+ for row in data:
|
|
|
+ feedback.pushInfo(f"{row[0]}: {row[1]}")
|
|
|
+
|
|
|
+ feedback.pushInfo("正在统计数据文件大小...")
|
|
|
+ size_map = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0}
|
|
|
+ found_tables = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0}
|
|
|
+ missing_tables = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0}
|
|
|
+
|
|
|
+ # 特别处理 osgb 类型的数据
|
|
|
+ feedback.pushInfo("正在处理 osgb 类型数据...")
|
|
|
+ cursor.execute(f"""
|
|
|
+ SELECT id, sjywz
|
|
|
+ FROM {full_table}
|
|
|
+ WHERE sjlx = 'osgb' AND sjywz IS NOT NULL
|
|
|
+ """)
|
|
|
+ osgb_rows = cursor.fetchall()
|
|
|
+
|
|
|
+ feedback.pushInfo(f"找到 {len(osgb_rows)} 条 osgb 数据记录")
|
|
|
+
|
|
|
+ # 为 osgb 类型计算总大小
|
|
|
+ total_osgb_size = 0
|
|
|
+ osgb_files_count = 0
|
|
|
+ missing_osgb_files = 0
|
|
|
+ osgb_file_sizes = [] # 用于统计各个文件的大小
|
|
|
+
|
|
|
+ for osgb_id, sjywz in osgb_rows:
|
|
|
+ if not sjywz:
|
|
|
+ missing_osgb_files += 1
|
|
|
+ feedback.pushWarning(f"ID为 {osgb_id} 的 osgb 记录 sjywz 字段为空")
|
|
|
+ continue
|
|
|
+
|
|
|
+ try:
|
|
|
+ # 检查文件路径是否存在
|
|
|
+ if not os.path.exists(sjywz):
|
|
|
+ missing_osgb_files += 1
|
|
|
+ feedback.pushWarning(f"找不到 osgb 文件路径: {sjywz}")
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 如果 sjywz 是文件夹,则计算文件夹总大小
|
|
|
+ if os.path.isdir(sjywz):
|
|
|
+ folder_size = 0
|
|
|
+ for dirpath, dirnames, filenames in os.walk(sjywz):
|
|
|
+ for filename in filenames:
|
|
|
+ file_path = os.path.join(dirpath, filename)
|
|
|
+ if os.path.exists(file_path):
|
|
|
+ folder_size += os.path.getsize(file_path)
|
|
|
+
|
|
|
+ file_size = folder_size
|
|
|
+ feedback.pushInfo(f"OSGB文件夹 {sjywz} 总大小: {round(file_size / 1024 / 1024, 2)} MB")
|
|
|
+ else:
|
|
|
+ # 如果 sjywz 是文件,则直接获取文件大小
|
|
|
+ file_size = os.path.getsize(sjywz)
|
|
|
+ feedback.pushInfo(f"OSGB文件 {sjywz} 大小: {round(file_size / 1024 / 1024, 2)} MB")
|
|
|
+
|
|
|
+ total_osgb_size += file_size
|
|
|
+ osgb_files_count += 1
|
|
|
+ osgb_file_sizes.append((sjywz, file_size))
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ missing_osgb_files += 1
|
|
|
+ feedback.pushWarning(f"读取 osgb 文件 {sjywz} 失败: {str(e)}")
|
|
|
+
|
|
|
+ # 更新 osgb 类型的统计信息
|
|
|
+ size_map['osgb'] = total_osgb_size
|
|
|
+ found_tables['osgb'] = osgb_files_count
|
|
|
+ missing_tables['osgb'] = missing_osgb_files
|
|
|
+
|
|
|
+ # 输出 osgb 统计结果
|
|
|
+ feedback.pushInfo(f"\nOSGB 文件统计结果:")
|
|
|
+ feedback.pushInfo(f"总文件数: {osgb_files_count}")
|
|
|
+ feedback.pushInfo(f"缺失文件数: {missing_osgb_files}")
|
|
|
+ feedback.pushInfo(f"总大小: {round(total_osgb_size / 1024 / 1024, 2)} MB")
|
|
|
+
|
|
|
+ # 输出最大的几个 osgb 文件
|
|
|
+ if osgb_file_sizes:
|
|
|
+ osgb_file_sizes.sort(key=lambda x: x[1], reverse=True)
|
|
|
+ feedback.pushInfo("\n最大的5个 OSGB 文件:")
|
|
|
+ for i, (file_path, size) in enumerate(osgb_file_sizes[:5], 1):
|
|
|
+ feedback.pushInfo(f"{i}. {file_path}: {round(size / 1024 / 1024, 2)} MB")
|
|
|
+
|
|
|
+ # 处理其他数据类型
|
|
|
+ cursor.execute(f"""
|
|
|
+ SELECT sjlx, name, sjywz
|
|
|
+ FROM {full_table}
|
|
|
+ WHERE sjlx IN ('vector', 'raster', 'table') AND sjlx != 'osgb'
|
|
|
+ """)
|
|
|
+ rows = cursor.fetchall()
|
|
|
+
|
|
|
+ for sjlx, name, sjywz in rows:
|
|
|
+ try:
|
|
|
+ if sjlx in ['vector', 'raster']:
|
|
|
+ if not name:
|
|
|
+ continue
|
|
|
+ table_name_candidates = self.get_table_name_candidates(name)
|
|
|
+ else: # sjlx == 'table'
|
|
|
+ if not sjywz:
|
|
|
+ continue
|
|
|
+ table_name_candidates = self.get_table_name_candidates(sjywz)
|
|
|
+
|
|
|
+ size_found = False
|
|
|
+ for full_target in table_name_candidates:
|
|
|
+ try:
|
|
|
+ cursor.execute("SELECT pg_total_relation_size(%s);", (full_target,))
|
|
|
+ size = cursor.fetchone()
|
|
|
+ if size and size[0]:
|
|
|
+ size_map[sjlx] += size[0]
|
|
|
+ found_tables[sjlx] += 1
|
|
|
+ size_found = True
|
|
|
+ break
|
|
|
+ except Exception:
|
|
|
+ continue
|
|
|
+
|
|
|
+ if not size_found:
|
|
|
+ missing_tables[sjlx] += 1
|
|
|
+ feedback.pushWarning(f"无法找到表:{name if sjlx in ['vector', 'raster'] else sjywz}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ feedback.pushInfo(f"处理表时出错:{name if sjlx in ['vector', 'raster'] else sjywz},错误信息:{str(e)}")
|
|
|
+
|
|
|
+ feedback.pushInfo("\n表大小统计结果:")
|
|
|
+ data_type_names = {
|
|
|
+ 'vector': '矢量数据',
|
|
|
+ 'raster': '栅格数据',
|
|
|
+ 'table': '附件资料',
|
|
|
+ 'osgb': '三维数据'
|
|
|
+ }
|
|
|
+ for sjlx in size_map:
|
|
|
+ feedback.pushInfo(
|
|
|
+ f"{data_type_names[sjlx]}: 找到 {found_tables[sjlx]} 个表/文件,缺失 {missing_tables[sjlx]} 个,总大小 {round(size_map[sjlx] / 1024 / 1024, 2)} MB")
|
|
|
+
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+ # 图表绘制
|
|
|
+ set_matplotlib_font()
|
|
|
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
|
|
|
+ fig, axes = plt.subplots(2, 2, figsize=(14, 12))
|
|
|
+
|
|
|
+ field_names = {'ywlx': '业务类型', 'sjlx': '数据类型', 'glbm': '管理部门'}
|
|
|
+
|
|
|
+ for idx, field in enumerate(fields):
|
|
|
+ data = stat_results.get(field, [])
|
|
|
+ labels = [str(r[0]) for r in data]
|
|
|
+ sizes = [r[1] for r in data]
|
|
|
+
|
|
|
+ ax = axes[idx // 2][idx % 2]
|
|
|
+ if field == 'ywlx':
|
|
|
+ self.plot_donut(ax, labels, sizes, f"{field_names[field]} 分布")
|
|
|
+ elif field == 'glbm':
|
|
|
+ self.plot_line(ax, labels, sizes, f"{field_names[field]} 分布")
|
|
|
+ else:
|
|
|
+ self.plot_pie(ax, labels, sizes, f"{field_names[field]} 分布")
|
|
|
+
|
|
|
+ size_labels = ['矢量数据', '栅格数据', '附件资料', '三维数据']
|
|
|
+ original_keys = ['vector', 'raster', 'table', 'osgb']
|
|
|
+ size_values = [round(size_map[key] / 1024 / 1024, 2) for key in original_keys]
|
|
|
+ self.plot_bar(axes[1][1], size_labels, size_values, "数据类型大小 (MB)")
|
|
|
+
|
|
|
+ plt.suptitle("数据字段分布与存储大小统计", fontsize=16)
|
|
|
+ plt.tight_layout(rect=[0, 0.03, 1, 0.95])
|
|
|
+
|
|
|
+ if export_charts:
|
|
|
+ png_path = os.path.join(export_dir, f"data_storage_stats_{timestamp}.png")
|
|
|
+ plt.savefig(png_path, dpi=300)
|
|
|
+ feedback.pushInfo(f"图表已保存至:{png_path}")
|
|
|
+
|
|
|
+ self.show_matplotlib_dialog(fig)
|
|
|
+ return {'结果': '统计完成'}
|
|
|
+
|
|
|
+ def get_table_name_candidates(self, table_ref):
|
|
|
+ candidates = [table_ref]
|
|
|
+ if '.' in table_ref:
|
|
|
+ parts = table_ref.split('.')
|
|
|
+ if len(parts) == 2:
|
|
|
+ candidates.extend([
|
|
|
+ f'"{parts[0]}"."{parts[1]}"',
|
|
|
+ f'{parts[0]}."{parts[1]}"',
|
|
|
+ f'"{parts[0]}".{parts[1]}'
|
|
|
+ ])
|
|
|
+ if '_' in table_ref:
|
|
|
+ candidates.append(table_ref.split('_', 1)[1])
|
|
|
+ if table_ref.startswith('hhht_'):
|
|
|
+ candidates.append(table_ref[5:])
|
|
|
+ return list(set(candidates))
|
|
|
+
|
|
|
+ def plot_donut(self, ax, labels, sizes, title):
|
|
|
+ if not sizes or sum(sizes) == 0:
|
|
|
+ ax.axis('off')
|
|
|
+ ax.set_title(f"{title}\n(无数据)")
|
|
|
+ return
|
|
|
+ if len(labels) > 6:
|
|
|
+ others_sum = sum(sizes[5:])
|
|
|
+ labels = labels[:5] + ["其他"]
|
|
|
+ sizes = sizes[:5] + [others_sum]
|
|
|
+ wedges, texts, autotexts = ax.pie(
|
|
|
+ sizes, labels=labels, autopct='%1.1f%%', startangle=90,
|
|
|
+ wedgeprops=dict(width=0.4), textprops={'fontsize': 9})
|
|
|
+ for i, autotext in enumerate(autotexts):
|
|
|
+ autotext.set_text(f'{sizes[i]}')
|
|
|
+ ax.set_title(title)
|
|
|
+ ax.axis('equal')
|
|
|
+ ax.legend(wedges, [f'{l}: {s}' for l, s in zip(labels, sizes)], loc="best", fontsize=8)
|
|
|
+
|
|
|
+ def plot_pie(self, ax, labels, sizes, title):
|
|
|
+ if not sizes or sum(sizes) == 0:
|
|
|
+ ax.axis('off')
|
|
|
+ ax.set_title(f"{title}\n(无数据)")
|
|
|
+ return
|
|
|
+ if len(labels) > 6:
|
|
|
+ others_sum = sum(sizes[5:])
|
|
|
+ labels = labels[:5] + ["其他"]
|
|
|
+ sizes = sizes[:5] + [others_sum]
|
|
|
+ wedges, texts, autotexts = ax.pie(
|
|
|
+ sizes, labels=labels, autopct='%1.1f%%', startangle=90,
|
|
|
+ textprops={'fontsize': 9})
|
|
|
+ for i, autotext in enumerate(autotexts):
|
|
|
+ autotext.set_text(f'{sizes[i]}')
|
|
|
+ ax.set_title(title)
|
|
|
+ ax.axis('equal')
|
|
|
+ ax.legend(wedges, [f'{l}: {s}' for l, s in zip(labels, sizes)], loc="best", fontsize=8)
|
|
|
+
|
|
|
+ def plot_line(self, ax, labels, sizes, title):
|
|
|
+ if not sizes:
|
|
|
+ ax.axis('off')
|
|
|
+ ax.set_title(f"{title}\n(无数据)")
|
|
|
+ return
|
|
|
+ ax.plot(labels, sizes, marker='o', linestyle='-', color='teal')
|
|
|
+ ax.set_title(title)
|
|
|
+ ax.set_xlabel("管理部门")
|
|
|
+ ax.set_ylabel("数量")
|
|
|
+ for i, (x, y) in enumerate(zip(labels, sizes)):
|
|
|
+ ax.annotate(f'{y}', (x, y), textcoords="offset points", xytext=(0, 5), ha='center', fontsize=9)
|
|
|
+ ax.tick_params(axis='x', rotation=30)
|
|
|
+
|
|
|
+ def plot_bar(self, ax, labels, sizes, title):
|
|
|
+ if not sizes or sum(sizes) == 0:
|
|
|
+ ax.axis('off')
|
|
|
+ ax.set_title(f"{title}\n(无数据)")
|
|
|
+ return
|
|
|
+ bars = ax.bar(labels, sizes, color='cornflowerblue')
|
|
|
+ ax.set_title(title)
|
|
|
+ ax.set_xlabel('数据类型')
|
|
|
+ ax.set_ylabel('大小 (MB)')
|
|
|
+ for bar in bars:
|
|
|
+ height = bar.get_height()
|
|
|
+ ax.annotate(f'{height:.2f} MB',
|
|
|
+ xy=(bar.get_x() + bar.get_width() / 2, height),
|
|
|
+ xytext=(0, 3),
|
|
|
+ textcoords="offset points",
|
|
|
+ ha='center', va='bottom', fontsize=9)
|
|
|
+
|
|
|
+ def show_matplotlib_dialog(self, fig):
|
|
|
+ dialog = QDialog(iface.mainWindow())
|
|
|
+ dialog.setWindowTitle("统计图表")
|
|
|
+ layout = QVBoxLayout()
|
|
|
+ canvas = FigureCanvas(fig)
|
|
|
+ layout.addWidget(canvas)
|
|
|
+ dialog.setLayout(layout)
|
|
|
+ dialog.resize(1200, 900)
|
|
|
+ dialog.exec_()
|
|
|
+
|
|
|
+ def name(self):
|
|
|
+ return 'DataStorageStatistics'
|
|
|
+
|
|
|
+ def displayName(self):
|
|
|
+ return '数据统计'
|
|
|
+
|
|
|
+ def group(self):
|
|
|
+ return '数据分析工具'
|
|
|
+
|
|
|
+ def groupId(self):
|
|
|
+ return 'data_storage_analysis'
|
|
|
+
|
|
|
+ def createInstance(self):
|
|
|
+ return DataStorageStatistics()
|