DataStorageStatistics.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. __author__ = 'liying'
  2. __date__ = 'May 2025'
  3. __copyright__ = '(C) 2025, liying'
  4. import os
  5. import time
  6. import psycopg2
  7. import matplotlib.pyplot as plt
  8. from matplotlib import font_manager
  9. import numpy as np
  10. from PyQt5.QtWidgets import QDialog, QVBoxLayout
  11. from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
  12. from qgis.core import (
  13. QgsProcessingAlgorithm,
  14. QgsProcessingParameterProviderConnection,
  15. QgsProcessingParameterDatabaseSchema,
  16. QgsProcessingParameterString,
  17. QgsProcessingParameterFile,
  18. QgsProcessingParameterBoolean
  19. )
  20. from qgis.utils import iface
  21. # 设置字体以支持中文
  22. def set_matplotlib_font():
  23. try:
  24. font_path = 'C:/Windows/Fonts/msyh.ttc'
  25. prop = font_manager.FontProperties(fname=font_path)
  26. plt.rcParams['font.family'] = prop.get_name()
  27. except Exception as e:
  28. print(f"字体设置失败:{e}")
  29. class DataStorageStatistics(QgsProcessingAlgorithm):
  30. DATABASE = 'DATABASE'
  31. SCHEMA = 'SCHEMA'
  32. TABLE = 'TABLE'
  33. EXPORT_DIR = 'EXPORT_DIR'
  34. EXPORT_CHARTS = 'EXPORT_CHARTS'
  35. def initAlgorithm(self, config=None):
  36. self.addParameter(QgsProcessingParameterProviderConnection(
  37. self.DATABASE, '数据库连接', 'postgres', defaultValue='192.168.60.2'))
  38. self.addParameter(QgsProcessingParameterDatabaseSchema(
  39. self.SCHEMA, '模式', connectionParameterName=self.DATABASE, defaultValue='base'))
  40. self.addParameter(QgsProcessingParameterString(
  41. self.TABLE, '表名', defaultValue='t_vector_storage'))
  42. self.addParameter(QgsProcessingParameterFile(
  43. self.EXPORT_DIR, '图表导出目录', behavior=QgsProcessingParameterFile.Folder, optional=True))
  44. self.addParameter(QgsProcessingParameterBoolean(
  45. self.EXPORT_CHARTS, '是否导出图表', defaultValue=True))
  46. def processAlgorithm(self, parameters, context, feedback):
  47. connection_params = {
  48. 'host': '192.168.60.2',
  49. 'port': '5432',
  50. 'dbname': 'real3d',
  51. 'user': 'postgres',
  52. 'password': 'postgis'
  53. }
  54. schema = self.parameterAsString(parameters, self.SCHEMA, context)
  55. table = self.parameterAsString(parameters, self.TABLE, context)
  56. full_table = f'"{schema}"."{table}"'
  57. export_dir = self.parameterAsString(parameters, self.EXPORT_DIR, context) or os.path.expanduser(
  58. '~/qgis_stat_exports')
  59. os.makedirs(export_dir, exist_ok=True)
  60. export_charts = self.parameterAsBoolean(parameters, self.EXPORT_CHARTS, context)
  61. conn = psycopg2.connect(**connection_params)
  62. conn.autocommit = True
  63. cursor = conn.cursor()
  64. fields = ['ywlx', 'sjlx', 'glbm']
  65. stat_results = {}
  66. for field in fields:
  67. feedback.pushInfo(f"正在统计字段:{field}")
  68. cursor.execute(f"""
  69. SELECT {field}, COUNT(*) FROM {full_table}
  70. WHERE {field} IS NOT NULL
  71. GROUP BY {field}
  72. ORDER BY COUNT(*) DESC
  73. """)
  74. data = cursor.fetchall()
  75. stat_results[field] = data
  76. for row in data:
  77. feedback.pushInfo(f"{row[0]}: {row[1]}")
  78. feedback.pushInfo("正在统计数据文件大小...")
  79. size_map = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0}
  80. found_tables = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0}
  81. missing_tables = {'vector': 0, 'raster': 0, 'table': 0, 'osgb': 0}
  82. # 特别处理 osgb 类型的数据
  83. feedback.pushInfo("正在处理 osgb 类型数据...")
  84. cursor.execute(f"""
  85. SELECT id, sjywz
  86. FROM {full_table}
  87. WHERE sjlx = 'osgb' AND sjywz IS NOT NULL
  88. """)
  89. osgb_rows = cursor.fetchall()
  90. feedback.pushInfo(f"找到 {len(osgb_rows)} 条 osgb 数据记录")
  91. # 为 osgb 类型计算总大小
  92. total_osgb_size = 0
  93. osgb_files_count = 0
  94. missing_osgb_files = 0
  95. osgb_file_sizes = [] # 用于统计各个文件的大小
  96. for osgb_id, sjywz in osgb_rows:
  97. if not sjywz:
  98. missing_osgb_files += 1
  99. feedback.pushWarning(f"ID为 {osgb_id} 的 osgb 记录 sjywz 字段为空")
  100. continue
  101. try:
  102. # 检查文件路径是否存在
  103. if not os.path.exists(sjywz):
  104. missing_osgb_files += 1
  105. feedback.pushWarning(f"找不到 osgb 文件路径: {sjywz}")
  106. continue
  107. # 如果 sjywz 是文件夹,则计算文件夹总大小
  108. if os.path.isdir(sjywz):
  109. folder_size = 0
  110. for dirpath, dirnames, filenames in os.walk(sjywz):
  111. for filename in filenames:
  112. file_path = os.path.join(dirpath, filename)
  113. if os.path.exists(file_path):
  114. folder_size += os.path.getsize(file_path)
  115. file_size = folder_size
  116. feedback.pushInfo(f"OSGB文件夹 {sjywz} 总大小: {round(file_size / 1024 / 1024, 2)} MB")
  117. else:
  118. # 如果 sjywz 是文件,则直接获取文件大小
  119. file_size = os.path.getsize(sjywz)
  120. feedback.pushInfo(f"OSGB文件 {sjywz} 大小: {round(file_size / 1024 / 1024, 2)} MB")
  121. total_osgb_size += file_size
  122. osgb_files_count += 1
  123. osgb_file_sizes.append((sjywz, file_size))
  124. except Exception as e:
  125. missing_osgb_files += 1
  126. feedback.pushWarning(f"读取 osgb 文件 {sjywz} 失败: {str(e)}")
  127. # 更新 osgb 类型的统计信息
  128. size_map['osgb'] = total_osgb_size
  129. found_tables['osgb'] = osgb_files_count
  130. missing_tables['osgb'] = missing_osgb_files
  131. # 输出 osgb 统计结果
  132. feedback.pushInfo(f"\nOSGB 文件统计结果:")
  133. feedback.pushInfo(f"总文件数: {osgb_files_count}")
  134. feedback.pushInfo(f"缺失文件数: {missing_osgb_files}")
  135. feedback.pushInfo(f"总大小: {round(total_osgb_size / 1024 / 1024, 2)} MB")
  136. # 输出最大的几个 osgb 文件
  137. if osgb_file_sizes:
  138. osgb_file_sizes.sort(key=lambda x: x[1], reverse=True)
  139. feedback.pushInfo("\n最大的5个 OSGB 文件:")
  140. for i, (file_path, size) in enumerate(osgb_file_sizes[:5], 1):
  141. feedback.pushInfo(f"{i}. {file_path}: {round(size / 1024 / 1024, 2)} MB")
  142. # 处理其他数据类型
  143. cursor.execute(f"""
  144. SELECT sjlx, name, sjywz
  145. FROM {full_table}
  146. WHERE sjlx IN ('vector', 'raster', 'table') AND sjlx != 'osgb'
  147. """)
  148. rows = cursor.fetchall()
  149. for sjlx, name, sjywz in rows:
  150. try:
  151. if sjlx in ['vector', 'raster']:
  152. if not name:
  153. continue
  154. table_name_candidates = self.get_table_name_candidates(name)
  155. else: # sjlx == 'table'
  156. if not sjywz:
  157. continue
  158. table_name_candidates = self.get_table_name_candidates(sjywz)
  159. size_found = False
  160. for full_target in table_name_candidates:
  161. try:
  162. cursor.execute("SELECT pg_total_relation_size(%s);", (full_target,))
  163. size = cursor.fetchone()
  164. if size and size[0]:
  165. size_map[sjlx] += size[0]
  166. found_tables[sjlx] += 1
  167. size_found = True
  168. break
  169. except Exception:
  170. continue
  171. if not size_found:
  172. missing_tables[sjlx] += 1
  173. feedback.pushWarning(f"无法找到表:{name if sjlx in ['vector', 'raster'] else sjywz}")
  174. except Exception as e:
  175. feedback.pushInfo(f"处理表时出错:{name if sjlx in ['vector', 'raster'] else sjywz},错误信息:{str(e)}")
  176. feedback.pushInfo("\n表大小统计结果:")
  177. data_type_names = {
  178. 'vector': '矢量数据',
  179. 'raster': '栅格数据',
  180. 'table': '附件资料',
  181. 'osgb': '三维数据'
  182. }
  183. for sjlx in size_map:
  184. feedback.pushInfo(
  185. f"{data_type_names[sjlx]}: 找到 {found_tables[sjlx]} 个表/文件,缺失 {missing_tables[sjlx]} 个,总大小 {round(size_map[sjlx] / 1024 / 1024, 2)} MB")
  186. conn.close()
  187. # 图表绘制
  188. set_matplotlib_font()
  189. timestamp = time.strftime("%Y%m%d_%H%M%S")
  190. fig, axes = plt.subplots(2, 2, figsize=(14, 12))
  191. field_names = {'ywlx': '业务类型', 'sjlx': '数据类型', 'glbm': '管理部门'}
  192. for idx, field in enumerate(fields):
  193. data = stat_results.get(field, [])
  194. labels = [str(r[0]) for r in data]
  195. sizes = [r[1] for r in data]
  196. ax = axes[idx // 2][idx % 2]
  197. if field == 'ywlx':
  198. self.plot_donut(ax, labels, sizes, f"{field_names[field]} 分布")
  199. elif field == 'glbm':
  200. self.plot_line(ax, labels, sizes, f"{field_names[field]} 分布")
  201. else:
  202. self.plot_pie(ax, labels, sizes, f"{field_names[field]} 分布")
  203. size_labels = ['矢量数据', '栅格数据', '附件资料', '三维数据']
  204. original_keys = ['vector', 'raster', 'table', 'osgb']
  205. size_values = [round(size_map[key] / 1024 / 1024, 2) for key in original_keys]
  206. self.plot_bar(axes[1][1], size_labels, size_values, "数据类型大小 (MB)")
  207. plt.suptitle("数据字段分布与存储大小统计", fontsize=16)
  208. plt.tight_layout(rect=[0, 0.03, 1, 0.95])
  209. if export_charts:
  210. png_path = os.path.join(export_dir, f"data_storage_stats_{timestamp}.png")
  211. plt.savefig(png_path, dpi=300)
  212. feedback.pushInfo(f"图表已保存至:{png_path}")
  213. self.show_matplotlib_dialog(fig)
  214. return {'结果': '统计完成'}
  215. def get_table_name_candidates(self, table_ref):
  216. candidates = [table_ref]
  217. if '.' in table_ref:
  218. parts = table_ref.split('.')
  219. if len(parts) == 2:
  220. candidates.extend([
  221. f'"{parts[0]}"."{parts[1]}"',
  222. f'{parts[0]}."{parts[1]}"',
  223. f'"{parts[0]}".{parts[1]}'
  224. ])
  225. if '_' in table_ref:
  226. candidates.append(table_ref.split('_', 1)[1])
  227. if table_ref.startswith('hhht_'):
  228. candidates.append(table_ref[5:])
  229. return list(set(candidates))
  230. def plot_donut(self, ax, labels, sizes, title):
  231. if not sizes or sum(sizes) == 0:
  232. ax.axis('off')
  233. ax.set_title(f"{title}\n(无数据)")
  234. return
  235. if len(labels) > 6:
  236. others_sum = sum(sizes[5:])
  237. labels = labels[:5] + ["其他"]
  238. sizes = sizes[:5] + [others_sum]
  239. wedges, texts, autotexts = ax.pie(
  240. sizes, labels=labels, autopct='%1.1f%%', startangle=90,
  241. wedgeprops=dict(width=0.4), textprops={'fontsize': 9})
  242. for i, autotext in enumerate(autotexts):
  243. autotext.set_text(f'{sizes[i]}')
  244. ax.set_title(title)
  245. ax.axis('equal')
  246. ax.legend(wedges, [f'{l}: {s}' for l, s in zip(labels, sizes)], loc="best", fontsize=8)
  247. def plot_pie(self, ax, labels, sizes, title):
  248. if not sizes or sum(sizes) == 0:
  249. ax.axis('off')
  250. ax.set_title(f"{title}\n(无数据)")
  251. return
  252. if len(labels) > 6:
  253. others_sum = sum(sizes[5:])
  254. labels = labels[:5] + ["其他"]
  255. sizes = sizes[:5] + [others_sum]
  256. wedges, texts, autotexts = ax.pie(
  257. sizes, labels=labels, autopct='%1.1f%%', startangle=90,
  258. textprops={'fontsize': 9})
  259. for i, autotext in enumerate(autotexts):
  260. autotext.set_text(f'{sizes[i]}')
  261. ax.set_title(title)
  262. ax.axis('equal')
  263. ax.legend(wedges, [f'{l}: {s}' for l, s in zip(labels, sizes)], loc="best", fontsize=8)
  264. def plot_line(self, ax, labels, sizes, title):
  265. if not sizes:
  266. ax.axis('off')
  267. ax.set_title(f"{title}\n(无数据)")
  268. return
  269. ax.plot(labels, sizes, marker='o', linestyle='-', color='teal')
  270. ax.set_title(title)
  271. ax.set_xlabel("管理部门")
  272. ax.set_ylabel("数量")
  273. for i, (x, y) in enumerate(zip(labels, sizes)):
  274. ax.annotate(f'{y}', (x, y), textcoords="offset points", xytext=(0, 5), ha='center', fontsize=9)
  275. ax.tick_params(axis='x', rotation=30)
  276. def plot_bar(self, ax, labels, sizes, title):
  277. if not sizes or sum(sizes) == 0:
  278. ax.axis('off')
  279. ax.set_title(f"{title}\n(无数据)")
  280. return
  281. bars = ax.bar(labels, sizes, color='cornflowerblue')
  282. ax.set_title(title)
  283. ax.set_xlabel('数据类型')
  284. ax.set_ylabel('大小 (MB)')
  285. for bar in bars:
  286. height = bar.get_height()
  287. ax.annotate(f'{height:.2f} MB',
  288. xy=(bar.get_x() + bar.get_width() / 2, height),
  289. xytext=(0, 3),
  290. textcoords="offset points",
  291. ha='center', va='bottom', fontsize=9)
  292. def show_matplotlib_dialog(self, fig):
  293. dialog = QDialog(iface.mainWindow())
  294. dialog.setWindowTitle("统计图表")
  295. layout = QVBoxLayout()
  296. canvas = FigureCanvas(fig)
  297. layout.addWidget(canvas)
  298. dialog.setLayout(layout)
  299. dialog.resize(1200, 900)
  300. dialog.exec_()
  301. def name(self):
  302. return 'DataStorageStatistics'
  303. def displayName(self):
  304. return '数据统计'
  305. def group(self):
  306. return '数据分析工具'
  307. def groupId(self):
  308. return 'data_storage_analysis'
  309. def createInstance(self):
  310. return DataStorageStatistics()