DataStorageStatistics.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368
  1. __author__ = 'liying'
  2. __date__ = 'May 2025'
  3. __copyright__ = '(C) 2025, liying'
  4. import os
  5. import time
  6. import psycopg2
  7. import matplotlib.pyplot as plt
  8. from matplotlib import font_manager
  9. import numpy as np
  10. import siwei_config
  11. from PyQt5.QtWidgets import QDialog, QVBoxLayout
  12. from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
  13. from qgis.core import (
  14. QgsProcessingAlgorithm,
  15. QgsProcessingParameterProviderConnection,
  16. QgsProcessingParameterDatabaseSchema,
  17. QgsProcessingParameterString,
  18. QgsProcessingParameterFile,
  19. QgsProcessingParameterBoolean
  20. )
  21. from qgis.utils import iface
  22. # 设置字体以支持中文
  23. def set_matplotlib_font():
  24. try:
  25. font_path = 'C:/Windows/Fonts/msyh.ttc'
  26. prop = font_manager.FontProperties(fname=font_path)
  27. plt.rcParams['font.family'] = prop.get_name()
  28. except Exception as e:
  29. print(f"字体设置失败:{e}")
  30. class DataStorageStatistics(QgsProcessingAlgorithm):
  31. DATABASE = 'DATABASE'
  32. SCHEMA = 'SCHEMA'
  33. TABLE = 'TABLE'
  34. EXPORT_DIR = 'EXPORT_DIR'
  35. EXPORT_CHARTS = 'EXPORT_CHARTS'
  36. def initAlgorithm(self, config=None):
  37. self.addParameter(QgsProcessingParameterProviderConnection(
  38. self.DATABASE, '数据库连接', 'postgres', defaultValue=siwei_config.CONFIG['db']['host']))
  39. self.addParameter(QgsProcessingParameterDatabaseSchema(
  40. self.SCHEMA, '模式', connectionParameterName=self.DATABASE, defaultValue='base'))
  41. self.addParameter(QgsProcessingParameterString(
  42. self.TABLE, '表名', defaultValue='t_vector_storage'))
  43. self.addParameter(QgsProcessingParameterFile(
  44. self.EXPORT_DIR, '图表导出目录', behavior=QgsProcessingParameterFile.Folder, optional=True))
  45. self.addParameter(QgsProcessingParameterBoolean(
  46. self.EXPORT_CHARTS, '是否导出图表', defaultValue=True))
  47. def processAlgorithm(self, parameters, context, feedback):
  48. connection_params = {
  49. 'host': siwei_config.CONFIG['db']['host'],
  50. 'port': siwei_config.CONFIG['db']['port'],
  51. 'dbname': siwei_config.CONFIG['db']['name'],
  52. 'user': siwei_config.CONFIG['db']['user'],
  53. 'password': siwei_config.CONFIG['db']['password'],
  54. "connect_timeout": 10,
  55. }
  56. schema = self.parameterAsString(parameters, self.SCHEMA, context)
  57. table = self.parameterAsString(parameters, self.TABLE, context)
  58. full_table = f'"{schema}"."{table}"'
  59. export_dir = self.parameterAsString(parameters, self.EXPORT_DIR, context) or os.path.expanduser(
  60. '~/qgis_stat_exports')
  61. os.makedirs(export_dir, exist_ok=True)
  62. export_charts = self.parameterAsBoolean(parameters, self.EXPORT_CHARTS, context)
  63. conn = psycopg2.connect(**connection_params)
  64. conn.autocommit = True
  65. cursor = conn.cursor()
  66. fields = ['ywlx', 'sjlx', 'glbm']
  67. stat_results = {}
  68. for field in fields:
  69. feedback.pushInfo(f"正在统计字段:{field}")
  70. cursor.execute(f"""
  71. SELECT {field}, COUNT(*) FROM {full_table}
  72. WHERE {field} IS NOT NULL
  73. GROUP BY {field}
  74. ORDER BY COUNT(*) DESC
  75. """)
  76. data = cursor.fetchall()
  77. stat_results[field] = data
  78. for row in data:
  79. feedback.pushInfo(f"{row[0]}: {row[1]}")
  80. feedback.pushInfo("正在统计数据文件大小...")
  81. size_map = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0}
  82. found_tables = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0}
  83. missing_tables = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0}
  84. # 特别处理 osgb 类型的数据
  85. feedback.pushInfo("正在处理 osgb 类型数据...")
  86. cursor.execute(f"""
  87. SELECT id, sjywz
  88. FROM {full_table}
  89. WHERE sjlx = 'osgb' AND sjywz IS NOT NULL
  90. """)
  91. osgb_rows = cursor.fetchall()
  92. feedback.pushInfo(f"找到 {len(osgb_rows)} 条 osgb 数据记录")
  93. # 为 osgb 类型计算总大小
  94. total_osgb_size = 0
  95. osgb_files_count = 0
  96. missing_osgb_files = 0
  97. osgb_file_sizes = [] # 用于统计各个文件的大小
  98. for osgb_id, sjywz in osgb_rows:
  99. if not sjywz:
  100. missing_osgb_files += 1
  101. feedback.pushWarning(f"ID为 {osgb_id} 的 osgb 记录 sjywz 字段为空")
  102. continue
  103. try:
  104. # 检查文件路径是否存在
  105. if not os.path.exists(sjywz):
  106. missing_osgb_files += 1
  107. feedback.pushWarning(f"找不到 osgb 文件路径: {sjywz}")
  108. continue
  109. # 如果 sjywz 是文件夹,则计算文件夹总大小
  110. if os.path.isdir(sjywz):
  111. folder_size = 0
  112. for dirpath, dirnames, filenames in os.walk(sjywz):
  113. for filename in filenames:
  114. file_path = os.path.join(dirpath, filename)
  115. if os.path.exists(file_path):
  116. folder_size += os.path.getsize(file_path)
  117. file_size = folder_size
  118. feedback.pushInfo(f"OSGB文件夹 {sjywz} 总大小: {round(file_size / 1024 / 1024, 2)} MB")
  119. else:
  120. # 如果 sjywz 是文件,则直接获取文件大小
  121. file_size = os.path.getsize(sjywz)
  122. feedback.pushInfo(f"OSGB文件 {sjywz} 大小: {round(file_size / 1024 / 1024, 2)} MB")
  123. total_osgb_size += file_size
  124. osgb_files_count += 1
  125. osgb_file_sizes.append((sjywz, file_size))
  126. except Exception as e:
  127. missing_osgb_files += 1
  128. feedback.pushWarning(f"读取 osgb 文件 {sjywz} 失败: {str(e)}")
  129. # 更新 osgb 类型的统计信息
  130. size_map['osgb'] = total_osgb_size
  131. found_tables['osgb'] = osgb_files_count
  132. missing_tables['osgb'] = missing_osgb_files
  133. # 输出 osgb 统计结果
  134. feedback.pushInfo(f"\nOSGB 文件统计结果:")
  135. feedback.pushInfo(f"总文件数: {osgb_files_count}")
  136. feedback.pushInfo(f"缺失文件数: {missing_osgb_files}")
  137. feedback.pushInfo(f"总大小: {round(total_osgb_size / 1024 / 1024, 2)} MB")
  138. # 输出最大的几个 osgb 文件
  139. if osgb_file_sizes:
  140. osgb_file_sizes.sort(key=lambda x: x[1], reverse=True)
  141. feedback.pushInfo("\n最大的5个 OSGB 文件:")
  142. for i, (file_path, size) in enumerate(osgb_file_sizes[:5], 1):
  143. feedback.pushInfo(f"{i}. {file_path}: {round(size / 1024 / 1024, 2)} MB")
  144. # 处理其他数据类型
  145. cursor.execute(f"""
  146. SELECT sjlx, name, sjywz
  147. FROM {full_table}
  148. WHERE sjlx IN ('vector', 'raster', 'table') AND sjlx != 'osgb'
  149. """)
  150. rows = cursor.fetchall()
  151. for sjlx, name, sjywz in rows:
  152. try:
  153. if sjlx in ['vector', 'raster']:
  154. if not name:
  155. continue
  156. table_name_candidates = self.get_table_name_candidates(name)
  157. else: # sjlx == 'table'
  158. if not sjywz:
  159. continue
  160. table_name_candidates = self.get_table_name_candidates(sjywz)
  161. size_found = False
  162. for full_target in table_name_candidates:
  163. try:
  164. cursor.execute("SELECT pg_total_relation_size(%s);", (full_target,))
  165. size = cursor.fetchone()
  166. if size and size[0]:
  167. size_map[sjlx] += size[0]
  168. found_tables[sjlx] += 1
  169. size_found = True
  170. break
  171. except Exception:
  172. continue
  173. if not size_found:
  174. missing_tables[sjlx] += 1
  175. feedback.pushWarning(f"无法找到表:{name if sjlx in ['vector', 'raster'] else sjywz}")
  176. except Exception as e:
  177. feedback.pushInfo(f"处理表时出错:{name if sjlx in ['vector', 'raster'] else sjywz},错误信息:{str(e)}")
  178. feedback.pushInfo("\n表大小统计结果:")
  179. data_type_names = {
  180. 'vector': '矢量数据',
  181. 'raster': '栅格数据',
  182. 'table': '附件资料',
  183. 'osgb': '三维数据'
  184. }
  185. for sjlx in size_map:
  186. feedback.pushInfo(
  187. f"{data_type_names[sjlx]}: 找到 {found_tables[sjlx]} 个表/文件,缺失 {missing_tables[sjlx]} 个,总大小 {round(size_map[sjlx] / 1024 / 1024, 2)} MB")
  188. conn.close()
  189. # 图表绘制
  190. set_matplotlib_font()
  191. timestamp = time.strftime("%Y%m%d_%H%M%S")
  192. fig, axes = plt.subplots(2, 2, figsize=(14, 12))
  193. field_names = {'ywlx': '业务类型', 'sjlx': '数据类型', 'glbm': '管理部门'}
  194. for idx, field in enumerate(fields):
  195. data = stat_results.get(field, [])
  196. labels = [str(r[0]) for r in data]
  197. sizes = [r[1] for r in data]
  198. ax = axes[idx // 2][idx % 2]
  199. if field == 'ywlx':
  200. self.plot_donut(ax, labels, sizes, f"{field_names[field]} 分布")
  201. elif field == 'glbm':
  202. self.plot_line(ax, labels, sizes, f"{field_names[field]} 分布")
  203. else:
  204. self.plot_pie(ax, labels, sizes, f"{field_names[field]} 分布")
  205. size_labels = ['矢量数据', '栅格数据', '附件资料', '三维数据']
  206. original_keys = ['vector', 'raster', 'table', 'osgb']
  207. size_values = [round(size_map[key] / 1024 / 1024, 2) for key in original_keys]
  208. self.plot_bar(axes[1][1], size_labels, size_values, "数据类型大小 (MB)")
  209. plt.suptitle("数据字段分布与存储大小统计", fontsize=16)
  210. plt.tight_layout(rect=[0, 0.03, 1, 0.95])
  211. if export_charts:
  212. png_path = os.path.join(export_dir, f"data_storage_stats_{timestamp}.png")
  213. plt.savefig(png_path, dpi=300)
  214. feedback.pushInfo(f"图表已保存至:{png_path}")
  215. self.show_matplotlib_dialog(fig)
  216. return {'结果': '统计完成'}
  217. def get_table_name_candidates(self, table_ref):
  218. candidates = [table_ref]
  219. if '.' in table_ref:
  220. parts = table_ref.split('.')
  221. if len(parts) == 2:
  222. candidates.extend([
  223. f'"{parts[0]}"."{parts[1]}"',
  224. f'{parts[0]}."{parts[1]}"',
  225. f'"{parts[0]}".{parts[1]}'
  226. ])
  227. if '_' in table_ref:
  228. candidates.append(table_ref.split('_', 1)[1])
  229. if table_ref.startswith('hhht_'):
  230. candidates.append(table_ref[5:])
  231. return list(set(candidates))
  232. def plot_donut(self, ax, labels, sizes, title):
  233. if not sizes or sum(sizes) == 0:
  234. ax.axis('off')
  235. ax.set_title(f"{title}\n(无数据)")
  236. return
  237. if len(labels) > 6:
  238. others_sum = sum(sizes[5:])
  239. labels = labels[:5] + ["其他"]
  240. sizes = sizes[:5] + [others_sum]
  241. wedges, texts, autotexts = ax.pie(
  242. sizes, labels=labels, autopct='%1.1f%%', startangle=90,
  243. wedgeprops=dict(width=0.4), textprops={'fontsize': 9})
  244. for i, autotext in enumerate(autotexts):
  245. autotext.set_text(f'{sizes[i]}')
  246. ax.set_title(title)
  247. ax.axis('equal')
  248. ax.legend(wedges, [f'{l}: {s}' for l, s in zip(labels, sizes)], loc="best", fontsize=8)
  249. def plot_pie(self, ax, labels, sizes, title):
  250. if not sizes or sum(sizes) == 0:
  251. ax.axis('off')
  252. ax.set_title(f"{title}\n(无数据)")
  253. return
  254. if len(labels) > 6:
  255. others_sum = sum(sizes[5:])
  256. labels = labels[:5] + ["其他"]
  257. sizes = sizes[:5] + [others_sum]
  258. wedges, texts, autotexts = ax.pie(
  259. sizes, labels=labels, autopct='%1.1f%%', startangle=90,
  260. textprops={'fontsize': 9})
  261. for i, autotext in enumerate(autotexts):
  262. autotext.set_text(f'{sizes[i]}')
  263. ax.set_title(title)
  264. ax.axis('equal')
  265. ax.legend(wedges, [f'{l}: {s}' for l, s in zip(labels, sizes)], loc="best", fontsize=8)
  266. def plot_line(self, ax, labels, sizes, title):
  267. if not sizes:
  268. ax.axis('off')
  269. ax.set_title(f"{title}\n(无数据)")
  270. return
  271. ax.plot(labels, sizes, marker='o', linestyle='-', color='teal')
  272. ax.set_title(title)
  273. ax.set_xlabel("管理部门")
  274. ax.set_ylabel("数量")
  275. for i, (x, y) in enumerate(zip(labels, sizes)):
  276. ax.annotate(f'{y}', (x, y), textcoords="offset points", xytext=(0, 5), ha='center', fontsize=9)
  277. ax.tick_params(axis='x', rotation=30)
  278. def plot_bar(self, ax, labels, sizes, title):
  279. if not sizes or sum(sizes) == 0:
  280. ax.axis('off')
  281. ax.set_title(f"{title}\n(无数据)")
  282. return
  283. bars = ax.bar(labels, sizes, color='cornflowerblue')
  284. ax.set_title(title)
  285. ax.set_xlabel('数据类型')
  286. ax.set_ylabel('大小 (MB)')
  287. for bar in bars:
  288. height = bar.get_height()
  289. ax.annotate(f'{height:.2f} MB',
  290. xy=(bar.get_x() + bar.get_width() / 2, height),
  291. xytext=(0, 3),
  292. textcoords="offset points",
  293. ha='center', va='bottom', fontsize=9)
  294. def show_matplotlib_dialog(self, fig):
  295. dialog = QDialog(iface.mainWindow())
  296. dialog.setWindowTitle("统计图表")
  297. layout = QVBoxLayout()
  298. canvas = FigureCanvas(fig)
  299. layout.addWidget(canvas)
  300. dialog.setLayout(layout)
  301. dialog.resize(1200, 900)
  302. dialog.exec_()
  303. def name(self):
  304. return 'DataStorageStatistics'
  305. def displayName(self):
  306. return '数据统计'
  307. def group(self):
  308. return '数据分析工具'
  309. def groupId(self):
  310. return 'data_storage_analysis'
  311. def createInstance(self):
  312. return DataStorageStatistics()