atomppgen.export 源代码

"""
赝势导出模块

提供多种格式的赝势文件导出功能:
- JSON:结构化元数据(参数、验证结果)
- NPZ:数值数据(径向网格、势能、波函数)
- UPF:Quantum ESPRESSO 兼容格式(实验性,当前仅提供最小可解析结构)

主要函数
--------
export_pseudopotential : 统一导出接口
"""

import json
import numpy as np
from pathlib import Path
from dataclasses import dataclass, asdict
from typing import Dict, List, Optional, Union
from datetime import datetime
import xml.etree.ElementTree as ET

from atomppgen.ae_atom import AEAtomResult
from atomppgen.tm import TMResult
from atomppgen.invert import InvertResult
from atomppgen.kb import KBResult
from atomppgen.validate import ValidationReport


__all__ = [
    "PseudopotentialData",
    "export_pseudopotential",
    "export_upf",
]


[文档] @dataclass class PseudopotentialData: """ 完整赝势数据包 聚合全电子解、TM伪化、势反演、验证报告等所有数据源, 用于统一格式导出。 Attributes ---------- Z : int 原子序数 symbol : str 元素符号(如 'Al') spin_mode : str 自旋模式('LDA' / 'GGA') xc_functional : str 交换关联泛函('PZ81' / 'VWN') generation_params : Dict 生成参数(TM参数、网格参数等) radial_grid : np.ndarray 径向网格,单位 Bohr radial_weights : np.ndarray 积分权重 ae_eigenvalues_by_l : Dict[int, np.ndarray] 全电子本征值(按角动量 l 索引),单位 Hartree ae_wavefunctions_by_l : Dict[int, List[np.ndarray]] 全电子径向波函数(u = r·R) pseudo_wavefunctions_by_l : Dict[int, np.ndarray] 赝波函数(伪化后的价电子态) semilocal_potentials_by_l : Dict[int, np.ndarray] 半局域势 V_l(r),单位 Hartree kb_loc_channel : Optional[int] KB 局域通道角动量量子数 l*(可选) kb_V_loc : Optional[np.ndarray] KB 局域势 V_loc(r),单位 Hartree(可选) kb_beta_l : Optional[Dict[int, np.ndarray]] KB 投影子 {l: β_l(r)}(可选,已归一化) kb_D_l : Optional[Dict[int, float]] KB 耦合系数 {l: D_l}(可选,Hartree) validation_report : ValidationReport 完整验证报告 generation_date : str 生成时间戳(ISO 8601格式) code_version : str 代码版本号 git_commit : Optional[str] Git 提交哈希(可选) """ Z: int symbol: str spin_mode: str xc_functional: str generation_params: Dict radial_grid: np.ndarray radial_weights: np.ndarray ae_eigenvalues_by_l: Dict[int, np.ndarray] ae_wavefunctions_by_l: Dict[int, List[np.ndarray]] pseudo_wavefunctions_by_l: Dict[int, np.ndarray] semilocal_potentials_by_l: Dict[int, np.ndarray] validation_report: ValidationReport generation_date: str kb_loc_channel: Optional[int] = None kb_V_loc: Optional[np.ndarray] = None kb_beta_l: Optional[Dict[int, np.ndarray]] = None kb_D_l: Optional[Dict[int, float]] = None code_version: str = "0.1.0" git_commit: Optional[str] = None
def _export_json( data: PseudopotentialData, output_path: Path, include_arrays: bool = False, ) -> None: """ 导出JSON格式(结构化元数据) Parameters ---------- data : PseudopotentialData 完整赝势数据 output_path : Path 输出文件路径 include_arrays : bool, default=False 是否包含数值数组(大文件警告)。 若 False,数组数据仅记录形状和范围。 Notes ----- JSON格式优先存储元数据(参数、验证结果),数值数据建议使用NPZ格式。 输出单位约定: - 能量:Hartree - 长度:Bohr """ output_path = Path(output_path) # 构建JSON数据结构 json_data = { "metadata": { "element": { "Z": data.Z, "symbol": data.symbol, }, "xc": { "functional": data.xc_functional, "spin_mode": data.spin_mode, }, "generation_date": data.generation_date, "code": { "name": "AtomPPGen", "version": data.code_version, }, "units": "Hartree_atomic", # 明确标注单位 }, "generation_params": data.generation_params, "all_electron_reference": { "eigenvalues_Ha": { f"l={l}": eps.tolist() for l, eps in data.ae_eigenvalues_by_l.items() }, }, "pseudopotential": { "channels": list(data.semilocal_potentials_by_l.keys()), "data_file": str(output_path.with_suffix('.npz').name), "note": "Numerical arrays stored in NPZ format for efficiency", }, "validation_report": data.validation_report.to_dict(), } if data.kb_V_loc is not None: json_data["pseudopotential"]["kb"] = { "loc_channel": int(data.kb_loc_channel) if data.kb_loc_channel is not None else None, "nonlocal_channels": sorted(int(l) for l in (data.kb_beta_l or {}).keys()), "note": "KB arrays (V_loc, beta_l, D_l) are stored in NPZ", } # 可选:包含git commit if data.git_commit: json_data["metadata"]["code"]["git_commit"] = data.git_commit # 可选:包含数组数据(仅用于调试/小数据集) if include_arrays: json_data["radial_grid"] = { "grid_Bohr": data.radial_grid.tolist(), "weights": data.radial_weights.tolist(), } json_data["pseudopotential"]["semilocal_potentials_Ha"] = { f"l={l}": V.tolist() for l, V in data.semilocal_potentials_by_l.items() } else: # 仅记录数组形状和范围 json_data["radial_grid"] = { "n_points": int(len(data.radial_grid)), "r_min_Bohr": float(data.radial_grid[0]), "r_max_Bohr": float(data.radial_grid[-1]), } # 确保输出目录存在 output_path.parent.mkdir(parents=True, exist_ok=True) # 写入JSON文件 with output_path.open('w', encoding='utf-8') as f: json.dump(json_data, f, indent=2, ensure_ascii=False) def _export_npz( data: PseudopotentialData, output_path: Path, ) -> None: """ 导出NPZ格式(压缩数值数据) Parameters ---------- data : PseudopotentialData 完整赝势数据 output_path : Path 输出文件路径 Notes ----- NPZ格式存储所有数值数组,使用压缩节省空间。 数组命名约定: - 'radial_grid':径向网格 (Bohr) - 'ae_eigenvalues_l{l}':全电子本征值 (Ha) - 'ae_wavefunction_l{l}_n{n}':全电子波函数 u(r) - 'ps_wavefunction_l{l}':赝波函数(价电子) - 'semilocal_potential_l{l}':半局域势 V_l(r) (Ha) 输出单位约定: - 能量:Hartree - 长度:Bohr """ output_path = Path(output_path) # 构建NPZ数据字典 npz_data = { # 网格 'radial_grid': data.radial_grid, 'radial_weights': data.radial_weights, # 元数据(标量) 'Z': data.Z, 'xc_code': _xc_to_code(data.xc_functional), } # 全电子本征值和波函数 for l, eigenvalues in data.ae_eigenvalues_by_l.items(): npz_data[f'ae_eigenvalues_l{l}'] = eigenvalues # 仅保存价电子波函数(最后一个n) if l in data.ae_wavefunctions_by_l: wavefunctions = data.ae_wavefunctions_by_l[l] if len(wavefunctions) > 0: n_valence = len(eigenvalues) # 价电子是最高占据态 npz_data[f'ae_wavefunction_l{l}_n{n_valence}'] = wavefunctions[-1] # 赝势数据 for l, wf in data.pseudo_wavefunctions_by_l.items(): npz_data[f'ps_wavefunction_l{l}'] = wf for l, V in data.semilocal_potentials_by_l.items(): npz_data[f'semilocal_potential_l{l}'] = V # KB 数据(可选) if data.kb_V_loc is not None: npz_data["kb_V_loc"] = data.kb_V_loc if data.kb_loc_channel is not None: npz_data["kb_loc_channel"] = int(data.kb_loc_channel) if data.kb_beta_l: for l, beta in data.kb_beta_l.items(): npz_data[f"kb_beta_l{int(l)}"] = beta if data.kb_D_l: for l, D in data.kb_D_l.items(): npz_data[f"kb_D_l{int(l)}"] = float(D) # 确保输出目录存在 output_path.parent.mkdir(parents=True, exist_ok=True) # 使用压缩保存 np.savez_compressed(output_path, **npz_data) def _xc_to_code(xc_functional: str) -> int: """ 交换关联泛函名称转换为数值代码 Parameters ---------- xc_functional : str 泛函名称(如 'PZ81', 'VWN') Returns ------- int 数值代码(1=PZ81, 2=VWN, ...) """ xc_map = { 'PZ81': 1, 'VWN': 2, 'PW91': 3, 'PBE': 4, } return xc_map.get(xc_functional.upper(), 0) def _infer_z_valence(Z: int) -> Optional[float]: """ 尽量推断常见元素的价电子数(用于 UPF header)。 Notes ----- 这是教学用途的保守映射。更严格的做法应由“芯态选择/价电子配置”显式给出。 """ mapping = { 11: 1.0, # Na: 3s^1 13: 3.0, # Al: 3s^2 3p^1 14: 4.0, # Si: 3s^2 3p^2 } return mapping.get(int(Z)) def _format_float_array(values: np.ndarray, per_line: int = 6) -> str: values = np.asarray(values).ravel() lines = [] for i in range(0, len(values), per_line): chunk = values[i:i + per_line] lines.append(" ".join(f"{float(x):.12e}" for x in chunk)) return "\n".join(lines)
[文档] def export_upf( data: PseudopotentialData, output_path: Union[str, Path], metadata: Optional[Dict] = None, ) -> Path: """ 导出 UPF v2(实验性) 当前目标是提供“最小可解析、字段可追溯”的 UPF 结构,便于后续逐步对齐 QE 的严格要求。 若要获得严格可用于 Quantum ESPRESSO 的 UPF,请关注后续版本更新。 Parameters ---------- data : PseudopotentialData 完整赝势数据包 output_path : str | Path 输出文件路径(建议后缀为 .upf) metadata : dict, optional 额外信息;UPF 需要的关键字段可在此给出: - z_valence : float(建议显式提供) Returns ------- Path 生成的 UPF 文件路径 """ output_path = Path(output_path) output_path.parent.mkdir(parents=True, exist_ok=True) _export_upf(data, output_path, metadata=metadata) return output_path
def _export_upf( data: PseudopotentialData, output_path: Path, metadata: Optional[Dict] = None, ) -> None: z_valence = None if metadata and metadata.get("z_valence") is not None: z_valence = float(metadata["z_valence"]) else: z_valence = _infer_z_valence(int(data.Z)) if z_valence is None: raise ValueError("UPF 导出需要提供 z_valence(可在 metadata['z_valence'] 指定)") # UPF 传统上使用 Ry 能量单位(1 Ha = 2 Ry) ha_to_ry = 2.0 r = np.asarray(data.radial_grid, dtype=float) rab = np.gradient(r) root = ET.Element("UPF", {"version": "2.0.1"}) info = ET.SubElement(root, "PP_INFO") info.text = ( "Generated by AtomPPGen (experimental UPF writer).\n" "This file is intended to be a traceable intermediate format; QE compatibility is not guaranteed yet." ) ET.SubElement( root, "PP_HEADER", { "generated": "AtomPPGen", "element": str(data.symbol), "pseudo_type": "NC", "relativistic": "nonrelativistic", "functional": str(data.xc_functional), "z_valence": f"{z_valence:.1f}", "l_max": str(int(max(data.semilocal_potentials_by_l.keys())) if data.semilocal_potentials_by_l else 0), "mesh_size": str(int(len(r))), "has_kb": "T" if data.kb_V_loc is not None else "F", "units": "bohr_ry", }, ) mesh = ET.SubElement(root, "PP_MESH") pp_r = ET.SubElement(mesh, "PP_R") pp_r.text = _format_float_array(r) pp_rab = ET.SubElement(mesh, "PP_RAB") pp_rab.text = _format_float_array(rab) if data.kb_V_loc is not None and data.kb_beta_l is not None and data.kb_D_l is not None: pp_local = ET.SubElement(root, "PP_LOCAL") pp_local.text = _format_float_array(np.asarray(data.kb_V_loc) * ha_to_ry) nonlocal_el = ET.SubElement(root, "PP_NONLOCAL") beta_items = sorted((int(l), np.asarray(beta)) for l, beta in data.kb_beta_l.items()) dij_diag = [] for idx, (l, beta) in enumerate(beta_items, start=1): beta_el = ET.SubElement( nonlocal_el, "PP_BETA", { "index": str(idx), "angular_momentum": str(int(l)), "cutoff_radius_index": "0", }, ) beta_el.text = _format_float_array(beta) D = float(data.kb_D_l[int(l)]) dij_diag.append(D * ha_to_ry) dij_el = ET.SubElement(nonlocal_el, "PP_DIJ") nproj = len(dij_diag) dij_matrix = np.zeros((nproj, nproj), dtype=float) for i in range(nproj): dij_matrix[i, i] = dij_diag[i] dij_el.text = _format_float_array(dij_matrix.ravel(), per_line=6) else: semilocal = ET.SubElement(root, "PP_SEMILOCAL") for l in sorted(int(k) for k in data.semilocal_potentials_by_l.keys()): vl = ET.SubElement(semilocal, "PP_VL", {"angular_momentum": str(int(l))}) vl.text = _format_float_array(np.asarray(data.semilocal_potentials_by_l[l]) * ha_to_ry) tree = ET.ElementTree(root) tree.write(output_path, encoding="utf-8", xml_declaration=True)
[文档] def export_pseudopotential( ae_result: AEAtomResult, tm_dict: Dict[int, TMResult], inv_dict: Dict[int, InvertResult], validation_report: ValidationReport, output_prefix: str, kb_result: Optional[KBResult] = None, formats: List[str] = ['json', 'npz'], metadata: Optional[Dict] = None, ) -> List[Path]: """ 导出赝势到多种格式 Parameters ---------- ae_result : AEAtomResult 全电子原子解 tm_dict : Dict[int, TMResult] 各通道TM伪化结果(按角动量 l 索引) inv_dict : Dict[int, InvertResult] 各通道势反演结果(按角动量 l 索引) validation_report : ValidationReport 完整验证报告 output_prefix : str 输出文件名前缀(如 'outputs/al_lda') formats : List[str], default=['json', 'npz'] 导出格式列表,支持 'json', 'npz', 'upf' metadata : Optional[Dict], default=None 额外元数据(如 git_commit, 备注) Returns ------- List[Path] 生成的文件路径列表 Raises ------ ValueError 若 tm_dict 和 inv_dict 的 l 通道不一致 Examples -------- >>> files = export_pseudopotential( ... ae, tm_dict, inv_dict, report, ... output_prefix='outputs/al_lda', ... formats=['json', 'npz'] ... ) >>> print(files) [PosixPath('outputs/al_lda.json'), PosixPath('outputs/al_lda.npz')] Notes ----- 输出单位约定: - 能量:Hartree 原子单位 - 长度:Bohr JSON格式包含元数据和验证报告,NPZ格式包含数值数组。 推荐同时导出JSON+NPZ以获得完整数据集。 """ # 输入验证:检查l通道一致性 if set(tm_dict.keys()) != set(inv_dict.keys()): raise ValueError( f"TM和势反演的l通道不一致: tm={set(tm_dict.keys())}, inv={set(inv_dict.keys())}" ) # 构建完整数据包 pp_data = PseudopotentialData( Z=ae_result.Z, symbol=_get_element_symbol(ae_result.Z), spin_mode="LDA", # 赝势生成固定使用 LDA(自旋无关势) xc_functional=ae_result.xc, # 直接使用 AEAtomResult 的 xc 字段 generation_params=_collect_generation_params(ae_result, tm_dict), radial_grid=ae_result.r, radial_weights=ae_result.w, ae_eigenvalues_by_l=ae_result.eps_by_l, ae_wavefunctions_by_l=ae_result.u_by_l, pseudo_wavefunctions_by_l={l: tm.u_ps for l, tm in tm_dict.items()}, semilocal_potentials_by_l={l: inv.V_l for l, inv in inv_dict.items()}, kb_loc_channel=kb_result.loc_channel if kb_result is not None else None, kb_V_loc=kb_result.V_loc if kb_result is not None else None, kb_beta_l=kb_result.beta_l if kb_result is not None else None, kb_D_l=kb_result.D_l if kb_result is not None else None, validation_report=validation_report, generation_date=datetime.now().isoformat(), code_version="0.1.0", git_commit=metadata.get('git_commit') if metadata else None, ) # 导出到各种格式 output_prefix = Path(output_prefix) output_files = [] for fmt in formats: if fmt.lower() == 'json': json_path = output_prefix.with_suffix('.json') _export_json(pp_data, json_path) output_files.append(json_path) elif fmt.lower() == 'npz': npz_path = output_prefix.with_suffix('.npz') _export_npz(pp_data, npz_path) output_files.append(npz_path) elif fmt.lower() == 'upf': upf_path = output_prefix.with_suffix('.upf') export_upf(pp_data, upf_path, metadata=metadata) output_files.append(upf_path) else: print(f"警告:未知格式 '{fmt}',跳过") return output_files
def _get_element_symbol(Z: int) -> str: """根据原子序数获取元素符号""" symbols = { 1: 'H', 2: 'He', 3: 'Li', 4: 'Be', 5: 'B', 6: 'C', 7: 'N', 8: 'O', 9: 'F', 10: 'Ne', 11: 'Na', 12: 'Mg', 13: 'Al', 14: 'Si', 15: 'P', 16: 'S', 17: 'Cl', 18: 'Ar', } return symbols.get(Z, f'Z{Z}') def _infer_xc_functional(ae_result: AEAtomResult) -> str: """从AE结果推断XC泛函名称(已弃用,直接使用 ae_result.xc)""" return ae_result.xc def _collect_generation_params( ae_result: AEAtomResult, tm_dict: Dict[int, TMResult], ) -> Dict: """收集生成参数(TM参数、网格参数等)""" # TM参数(从各通道提取rc) tm_params = { 'rc_by_l': {l: float(tm.rc) for l, tm in tm_dict.items()}, 'continuity_order': 2, # TM固定为2阶 } # 网格参数(从AE结果推断) grid_params = { 'type': 'exp_transformed', # 假设使用exp变换网格 'n_points': len(ae_result.r), 'r_max_Bohr': float(ae_result.r[-1]), } return { 'tm_pseudization': tm_params, 'radial_grid': grid_params, }