DataStorageStatistics.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. __author__ = 'liying'
  2. __date__ = 'May 2025'
  3. __copyright__ = '(C) 2025, liying'
  4. import os
  5. import time
  6. import psycopg2
  7. import matplotlib.pyplot as plt
  8. from matplotlib import font_manager
  9. import numpy as np
  10. import siwei_config
  11. from PyQt5.QtWidgets import QDialog, QVBoxLayout
  12. from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
  13. from qgis.core import (
  14. QgsProcessingAlgorithm,
  15. QgsProcessingParameterProviderConnection,
  16. QgsProcessingParameterDatabaseSchema,
  17. QgsProcessingParameterString,
  18. QgsProcessingParameterFile,
  19. QgsProcessingParameterBoolean
  20. )
  21. from qgis.utils import iface
  22. # 设置字体以支持中文
  23. def set_matplotlib_font():
  24. try:
  25. font_path = 'C:/Windows/Fonts/msyh.ttc'
  26. prop = font_manager.FontProperties(fname=font_path)
  27. plt.rcParams['font.family'] = prop.get_name()
  28. except Exception as e:
  29. print(f"字体设置失败:{e}")
  30. class DataStorageStatistics(QgsProcessingAlgorithm):
  31. DATABASE = 'DATABASE'
  32. SCHEMA = 'SCHEMA'
  33. TABLE = 'TABLE'
  34. EXPORT_DIR = 'EXPORT_DIR'
  35. EXPORT_CHARTS = 'EXPORT_CHARTS'
  36. def initAlgorithm(self, config=None):
  37. self.addParameter(QgsProcessingParameterProviderConnection(
  38. self.DATABASE, '数据库连接', 'postgres', defaultValue=siwei_config['db']['host']))
  39. self.addParameter(QgsProcessingParameterDatabaseSchema(
  40. self.SCHEMA, '模式', connectionParameterName=self.DATABASE, defaultValue='base'))
  41. self.addParameter(QgsProcessingParameterString(
  42. self.TABLE, '表名', defaultValue='t_vector_storage'))
  43. self.addParameter(QgsProcessingParameterFile(
  44. self.EXPORT_DIR, '图表导出目录', behavior=QgsProcessingParameterFile.Folder, optional=True))
  45. self.addParameter(QgsProcessingParameterBoolean(
  46. self.EXPORT_CHARTS, '是否导出图表', defaultValue=True))
  47. def processAlgorithm(self, parameters, context, feedback):
  48. connection_params = {
  49. 'host': siwei_config['db']['host'],
  50. 'port': siwei_config['db']['port'],
  51. 'dbname': siwei_config['db']['name'],
  52. 'user': siwei_config['db']['user'],
  53. 'password': siwei_config['db']['password'],
  54. }
  55. schema = self.parameterAsString(parameters, self.SCHEMA, context)
  56. table = self.parameterAsString(parameters, self.TABLE, context)
  57. full_table = f'"{schema}"."{table}"'
  58. export_dir = self.parameterAsString(parameters, self.EXPORT_DIR, context) or os.path.expanduser(
  59. '~/qgis_stat_exports')
  60. os.makedirs(export_dir, exist_ok=True)
  61. export_charts = self.parameterAsBoolean(parameters, self.EXPORT_CHARTS, context)
  62. conn = psycopg2.connect(**connection_params)
  63. conn.autocommit = True
  64. cursor = conn.cursor()
  65. fields = ['ywlx', 'sjlx', 'glbm']
  66. stat_results = {}
  67. for field in fields:
  68. feedback.pushInfo(f"正在统计字段:{field}")
  69. cursor.execute(f"""
  70. SELECT {field}, COUNT(*) FROM {full_table}
  71. WHERE {field} IS NOT NULL
  72. GROUP BY {field}
  73. ORDER BY COUNT(*) DESC
  74. """)
  75. data = cursor.fetchall()
  76. stat_results[field] = data
  77. for row in data:
  78. feedback.pushInfo(f"{row[0]}: {row[1]}")
  79. feedback.pushInfo("正在统计数据文件大小...")
  80. size_map = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0}
  81. found_tables = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0}
  82. missing_tables = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0}
  83. # 特别处理 osgb 类型的数据
  84. feedback.pushInfo("正在处理 osgb 类型数据...")
  85. cursor.execute(f"""
  86. SELECT id, sjywz
  87. FROM {full_table}
  88. WHERE sjlx = 'osgb' AND sjywz IS NOT NULL
  89. """)
  90. osgb_rows = cursor.fetchall()
  91. feedback.pushInfo(f"找到 {len(osgb_rows)} 条 osgb 数据记录")
  92. # 为 osgb 类型计算总大小
  93. total_osgb_size = 0
  94. osgb_files_count = 0
  95. missing_osgb_files = 0
  96. osgb_file_sizes = [] # 用于统计各个文件的大小
  97. for osgb_id, sjywz in osgb_rows:
  98. if not sjywz:
  99. missing_osgb_files += 1
  100. feedback.pushWarning(f"ID为 {osgb_id} 的 osgb 记录 sjywz 字段为空")
  101. continue
  102. try:
  103. # 检查文件路径是否存在
  104. if not os.path.exists(sjywz):
  105. missing_osgb_files += 1
  106. feedback.pushWarning(f"找不到 osgb 文件路径: {sjywz}")
  107. continue
  108. # 如果 sjywz 是文件夹,则计算文件夹总大小
  109. if os.path.isdir(sjywz):
  110. folder_size = 0
  111. for dirpath, dirnames, filenames in os.walk(sjywz):
  112. for filename in filenames:
  113. file_path = os.path.join(dirpath, filename)
  114. if os.path.exists(file_path):
  115. folder_size += os.path.getsize(file_path)
  116. file_size = folder_size
  117. feedback.pushInfo(f"OSGB文件夹 {sjywz} 总大小: {round(file_size / 1024 / 1024, 2)} MB")
  118. else:
  119. # 如果 sjywz 是文件,则直接获取文件大小
  120. file_size = os.path.getsize(sjywz)
  121. feedback.pushInfo(f"OSGB文件 {sjywz} 大小: {round(file_size / 1024 / 1024, 2)} MB")
  122. total_osgb_size += file_size
  123. osgb_files_count += 1
  124. osgb_file_sizes.append((sjywz, file_size))
  125. except Exception as e:
  126. missing_osgb_files += 1
  127. feedback.pushWarning(f"读取 osgb 文件 {sjywz} 失败: {str(e)}")
  128. # 更新 osgb 类型的统计信息
  129. size_map['osgb'] = total_osgb_size
  130. found_tables['osgb'] = osgb_files_count
  131. missing_tables['osgb'] = missing_osgb_files
  132. # 输出 osgb 统计结果
  133. feedback.pushInfo(f"\nOSGB 文件统计结果:")
  134. feedback.pushInfo(f"总文件数: {osgb_files_count}")
  135. feedback.pushInfo(f"缺失文件数: {missing_osgb_files}")
  136. feedback.pushInfo(f"总大小: {round(total_osgb_size / 1024 / 1024, 2)} MB")
  137. # 输出最大的几个 osgb 文件
  138. if osgb_file_sizes:
  139. osgb_file_sizes.sort(key=lambda x: x[1], reverse=True)
  140. feedback.pushInfo("\n最大的5个 OSGB 文件:")
  141. for i, (file_path, size) in enumerate(osgb_file_sizes[:5], 1):
  142. feedback.pushInfo(f"{i}. {file_path}: {round(size / 1024 / 1024, 2)} MB")
  143. # 处理其他数据类型
  144. cursor.execute(f"""
  145. SELECT sjlx, name, sjywz
  146. FROM {full_table}
  147. WHERE sjlx IN ('vector', 'raster', 'table') AND sjlx != 'osgb'
  148. """)
  149. rows = cursor.fetchall()
  150. for sjlx, name, sjywz in rows:
  151. try:
  152. if sjlx in ['vector', 'raster']:
  153. if not name:
  154. continue
  155. table_name_candidates = self.get_table_name_candidates(name)
  156. else: # sjlx == 'table'
  157. if not sjywz:
  158. continue
  159. table_name_candidates = self.get_table_name_candidates(sjywz)
  160. size_found = False
  161. for full_target in table_name_candidates:
  162. try:
  163. cursor.execute("SELECT pg_total_relation_size(%s);", (full_target,))
  164. size = cursor.fetchone()
  165. if size and size[0]:
  166. size_map[sjlx] += size[0]
  167. found_tables[sjlx] += 1
  168. size_found = True
  169. break
  170. except Exception:
  171. continue
  172. if not size_found:
  173. missing_tables[sjlx] += 1
  174. feedback.pushWarning(f"无法找到表:{name if sjlx in ['vector', 'raster'] else sjywz}")
  175. except Exception as e:
  176. feedback.pushInfo(f"处理表时出错:{name if sjlx in ['vector', 'raster'] else sjywz},错误信息:{str(e)}")
  177. feedback.pushInfo("\n表大小统计结果:")
  178. data_type_names = {
  179. 'vector': '矢量数据',
  180. 'raster': '栅格数据',
  181. 'table': '附件资料',
  182. 'osgb': '三维数据'
  183. }
  184. for sjlx in size_map:
  185. feedback.pushInfo(
  186. f"{data_type_names[sjlx]}: 找到 {found_tables[sjlx]} 个表/文件,缺失 {missing_tables[sjlx]} 个,总大小 {round(size_map[sjlx] / 1024 / 1024, 2)} MB")
  187. conn.close()
  188. # 图表绘制
  189. set_matplotlib_font()
  190. timestamp = time.strftime("%Y%m%d_%H%M%S")
  191. fig, axes = plt.subplots(2, 2, figsize=(14, 12))
  192. field_names = {'ywlx': '业务类型', 'sjlx': '数据类型', 'glbm': '管理部门'}
  193. for idx, field in enumerate(fields):
  194. data = stat_results.get(field, [])
  195. labels = [str(r[0]) for r in data]
  196. sizes = [r[1] for r in data]
  197. ax = axes[idx // 2][idx % 2]
  198. if field == 'ywlx':
  199. self.plot_donut(ax, labels, sizes, f"{field_names[field]} 分布")
  200. elif field == 'glbm':
  201. self.plot_line(ax, labels, sizes, f"{field_names[field]} 分布")
  202. else:
  203. self.plot_pie(ax, labels, sizes, f"{field_names[field]} 分布")
  204. size_labels = ['矢量数据', '栅格数据', '附件资料', '三维数据']
  205. original_keys = ['vector', 'raster', 'table', 'osgb']
  206. size_values = [round(size_map[key] / 1024 / 1024, 2) for key in original_keys]
  207. self.plot_bar(axes[1][1], size_labels, size_values, "数据类型大小 (MB)")
  208. plt.suptitle("数据字段分布与存储大小统计", fontsize=16)
  209. plt.tight_layout(rect=[0, 0.03, 1, 0.95])
  210. if export_charts:
  211. png_path = os.path.join(export_dir, f"data_storage_stats_{timestamp}.png")
  212. plt.savefig(png_path, dpi=300)
  213. feedback.pushInfo(f"图表已保存至:{png_path}")
  214. self.show_matplotlib_dialog(fig)
  215. return {'结果': '统计完成'}
  216. def get_table_name_candidates(self, table_ref):
  217. candidates = [table_ref]
  218. if '.' in table_ref:
  219. parts = table_ref.split('.')
  220. if len(parts) == 2:
  221. candidates.extend([
  222. f'"{parts[0]}"."{parts[1]}"',
  223. f'{parts[0]}."{parts[1]}"',
  224. f'"{parts[0]}".{parts[1]}'
  225. ])
  226. if '_' in table_ref:
  227. candidates.append(table_ref.split('_', 1)[1])
  228. if table_ref.startswith('hhht_'):
  229. candidates.append(table_ref[5:])
  230. return list(set(candidates))
  231. def plot_donut(self, ax, labels, sizes, title):
  232. if not sizes or sum(sizes) == 0:
  233. ax.axis('off')
  234. ax.set_title(f"{title}\n(无数据)")
  235. return
  236. if len(labels) > 6:
  237. others_sum = sum(sizes[5:])
  238. labels = labels[:5] + ["其他"]
  239. sizes = sizes[:5] + [others_sum]
  240. wedges, texts, autotexts = ax.pie(
  241. sizes, labels=labels, autopct='%1.1f%%', startangle=90,
  242. wedgeprops=dict(width=0.4), textprops={'fontsize': 9})
  243. for i, autotext in enumerate(autotexts):
  244. autotext.set_text(f'{sizes[i]}')
  245. ax.set_title(title)
  246. ax.axis('equal')
  247. ax.legend(wedges, [f'{l}: {s}' for l, s in zip(labels, sizes)], loc="best", fontsize=8)
  248. def plot_pie(self, ax, labels, sizes, title):
  249. if not sizes or sum(sizes) == 0:
  250. ax.axis('off')
  251. ax.set_title(f"{title}\n(无数据)")
  252. return
  253. if len(labels) > 6:
  254. others_sum = sum(sizes[5:])
  255. labels = labels[:5] + ["其他"]
  256. sizes = sizes[:5] + [others_sum]
  257. wedges, texts, autotexts = ax.pie(
  258. sizes, labels=labels, autopct='%1.1f%%', startangle=90,
  259. textprops={'fontsize': 9})
  260. for i, autotext in enumerate(autotexts):
  261. autotext.set_text(f'{sizes[i]}')
  262. ax.set_title(title)
  263. ax.axis('equal')
  264. ax.legend(wedges, [f'{l}: {s}' for l, s in zip(labels, sizes)], loc="best", fontsize=8)
  265. def plot_line(self, ax, labels, sizes, title):
  266. if not sizes:
  267. ax.axis('off')
  268. ax.set_title(f"{title}\n(无数据)")
  269. return
  270. ax.plot(labels, sizes, marker='o', linestyle='-', color='teal')
  271. ax.set_title(title)
  272. ax.set_xlabel("管理部门")
  273. ax.set_ylabel("数量")
  274. for i, (x, y) in enumerate(zip(labels, sizes)):
  275. ax.annotate(f'{y}', (x, y), textcoords="offset points", xytext=(0, 5), ha='center', fontsize=9)
  276. ax.tick_params(axis='x', rotation=30)
  277. def plot_bar(self, ax, labels, sizes, title):
  278. if not sizes or sum(sizes) == 0:
  279. ax.axis('off')
  280. ax.set_title(f"{title}\n(无数据)")
  281. return
  282. bars = ax.bar(labels, sizes, color='cornflowerblue')
  283. ax.set_title(title)
  284. ax.set_xlabel('数据类型')
  285. ax.set_ylabel('大小 (MB)')
  286. for bar in bars:
  287. height = bar.get_height()
  288. ax.annotate(f'{height:.2f} MB',
  289. xy=(bar.get_x() + bar.get_width() / 2, height),
  290. xytext=(0, 3),
  291. textcoords="offset points",
  292. ha='center', va='bottom', fontsize=9)
  293. def show_matplotlib_dialog(self, fig):
  294. dialog = QDialog(iface.mainWindow())
  295. dialog.setWindowTitle("统计图表")
  296. layout = QVBoxLayout()
  297. canvas = FigureCanvas(fig)
  298. layout.addWidget(canvas)
  299. dialog.setLayout(layout)
  300. dialog.resize(1200, 900)
  301. dialog.exec_()
  302. def name(self):
  303. return 'DataStorageStatistics'
  304. def displayName(self):
  305. return '数据统计'
  306. def group(self):
  307. return '数据分析工具'
  308. def groupId(self):
  309. return 'data_storage_analysis'
  310. def createInstance(self):
  311. return DataStorageStatistics()