diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..be0cf26 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +reference/ +.venv/ diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..e4fba21 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/README.md b/README.md index 4600e58..205d82a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,24 @@ +
+ # Geo-Layout Transformer 🚀 **A Unified, Self-Supervised Foundation Model for Physical Design Analysis** @@ -124,6 +145,22 @@ The first step is to convert your GDSII/OASIS files into a graph dataset that th ``` This script will parse the GDS file, divide it into patches, construct a graph for each patch, and save the processed data as `.pt` files for efficient loading. +#### Polygon handling and per-patch graphs 🧩 + +When building a graph for each patch, we now preserve both global and per-patch (clipped) polygon information to robustly handle polygons spanning multiple patches: + +- Each geometry retains: + - **Global polygon**: vertices, bbox, area. + - **Clipped polygon(s)** in the patch: vertices (may be multiple fragments), area, and the **area ratio** (clipped/global). + - **is_partial** flag indicating cross-patch polygons. + - **Layer index** and the **patch bbox**. +- Node features include centroid, width/height from clipped shape (or global if no clip), clipped area, area ratio, layer id, and partial flag. +- Extra metadata is attached on the PyG `Data` object: + - `data.layer: LongTensor [num_nodes]` + - `data.node_meta: List[Dict]` with per-node global/clipped details (for visualization/debugging) + +This follows the spirit of LayoutGMN’s structural encoding while staying compatible with our GNN encoder. + ### 4.2. Stage 2: Model Training Once the dataset is ready, you can train the Geo-Layout Transformer. @@ -159,6 +196,17 @@ This project is ambitious and we welcome contributions. Our future roadmap inclu Please feel free to open an issue or submit a pull request. +## Acknowledgments + +We stand on the shoulders of open-source communities. This project draws inspiration and/or utilities from: + +- PyTorch and PyTorch Geometric for model building and graph learning +- gdstk/klayout for GDSII/OASIS parsing and geometry operations +- Scientific Python stack (NumPy, SciPy) for numerical robustness +- Research works such as LayoutGMN (graph matching for structural similarity) that informed our polygon/graph handling design + +If your work is used and not listed here, please open an issue or PR so we can properly credit you. + --- Made with ❤️ for EDA research and open-source collaboration. diff --git a/README_zh.md b/README_zh.md index af59e63..5b399b6 100644 --- a/README_zh.md +++ b/README_zh.md @@ -1,3 +1,24 @@ + + # Geo-Layout Transformer 🚀 **一个用于物理设计分析的统一、自监督基础模型** @@ -124,6 +145,22 @@ Geo-Layout-Transformer/ ``` 该脚本将解析 GDS 文件,将其划分为多个区块,为每个区块构建一个图,并将处理后的数据保存为 `.pt` 文件以便高效加载。 +#### 多边形处理与按区块建图 🧩 + +在为每个区块(patch)构建图时,我们同时保留多边形的全局信息和区块内(裁剪后)的信息,以稳健处理跨越多个区块的多边形: + +- 每个几何对象包含: + - **全局多边形**:顶点、外接框、面积。 + - **区块内裁剪多边形(可能多个片段)**:顶点、面积,以及 **面积占比**(裁剪/全局)。 + - **is_partial 标记**:指示是否跨区块。 + - **层索引** 与 **区块边界框**。 +- 节点特征包含:基于裁剪形状(若无则基于全局)的质心、宽/高、裁剪面积、面积占比、层 id、是否跨区块标志。 +- 额外元数据保存在 PyG `Data` 对象中: + - `data.layer: LongTensor [num_nodes]` + - `data.node_meta: List[Dict]`,含每个节点的全局/裁剪细节(用于可视化/调试) + +该设计借鉴了 LayoutGMN 的结构编码思想,同时与我们现有的 GNN 编码器保持兼容。 + ### 4.2. 阶段二:模型训练 数据集准备就绪后,您就可以开始训练 Geo-Layout Transformer。 @@ -159,6 +196,17 @@ python main.py --config-file configs/default.yaml --mode pretrain --data-dir dat 欢迎随时提出 Issue 或提交 Pull Request。 +## 致谢 + +本项目离不开开源社区的贡献与启发,特别感谢: + +- PyTorch 与 PyTorch Geometric,为模型构建与图学习提供可靠基石 +- gdstk/klayout,为 GDSII/OASIS 的解析与几何操作提供高效能力 +- 科学计算生态(NumPy、SciPy),保障数值计算的稳定性 +- 研究工作 LayoutGMN(面向结构相似性的图匹配),启发了我们对多边形/图构建的设计 + +若您的工作被本项目使用但尚未列出,欢迎提交 Issue 或 PR 以便完善致谢。 + --- Made with ❤️ 面向 EDA 研究与开源协作。 diff --git a/pyproject.toml b/pyproject.toml index d97c6b9..ac5cede 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] -name = "layouttrans" +name = "geo-layout-transformer" version = "0.1.0" description = "Add your description here" readme = "README.md" -requires-python = ">=3.13" +requires-python = ">=3.12" dependencies = [] diff --git a/reference/.DS_Store b/reference/.DS_Store new file mode 100644 index 0000000..b320be5 Binary files /dev/null and b/reference/.DS_Store differ diff --git a/src/data/gds_parser.py b/src/data/gds_parser.py index d6350c2..a61993b 100644 --- a/src/data/gds_parser.py +++ b/src/data/gds_parser.py @@ -39,30 +39,76 @@ class GDSParser: return patches def extract_geometries_from_patch(self, patch_bbox: Tuple[float, float, float, float]) -> List[Dict]: - """从给定的区块中提取所有几何对象。 + """从给定的区块中提取所有几何对象,并记录全局与区块内(裁剪后)的信息。 + + 说明: + - 为了处理跨越多个区块的多边形,本函数会计算多边形与区块边界框的布尔相交, + 得到位于该区块内的裁剪多边形,并同时记录原始(全局)多边形信息。 Args: patch_bbox: 区块的边界框 (x_min, y_min, x_max, y_max)。 Returns: - 一个字典列表,每个字典代表一个几何对象及其属性(多边形、层、边界框)。 + 一个字典列表,每个字典代表一个几何对象及其属性: + - global_points: 原始多边形顶点(Nx2 ndarray) + - global_bbox: 原始多边形边界框 + - global_area: 原始多边形面积 + - clipped_points: 与区块相交后的裁剪多边形顶点(Mx2 ndarray,可能为空) + - clipped_area: 裁剪后面积(可能为 0) + - area_ratio: 裁剪面积 / 原始面积(用于衡量跨区块比例) + - is_partial: 是否为跨区块(裁剪面积 < 原始面积) + - layer: 层映射到的整数索引 + - patch_bbox: 当前区块边界框 """ x_min, y_min, x_max, y_max = patch_bbox - # 获取单元内的所有多边形 + rect = gdstk.rectangle(x_min, y_min, x_max, y_max) polygons = self.top_cell.get_polygons(by_spec=True) - geometries = [] - # 遍历所有多边形 + geometries: List[Dict] = [] + for (layer, datatype), poly_list in polygons.items(): layer_str = f"{layer}/{datatype}" - # 只处理在 layer_mapping 中定义的层 - if layer_str in self.layer_mapping: - for poly in poly_list: - # 简单的边界框相交检查 - p_xmin, p_ymin, p_xmax, p_ymax = poly.bb() - if not (p_xmax < x_min or p_xmin > x_max or p_ymax < y_min or p_ymin > y_max): - geometries.append({ - "polygon": poly, - "layer": self.layer_mapping[layer_str], - "bbox": (p_xmin, p_ymin, p_xmax, p_ymax) - }) + if layer_str not in self.layer_mapping: + continue + layer_idx = self.layer_mapping[layer_str] + + for poly in poly_list: + p_xmin, p_ymin, p_xmax, p_ymax = poly.bb() + # 快速边界框测试(若无相交则跳过) + if p_xmax < x_min or p_xmin > x_max or p_ymax < y_min or p_ymin > y_max: + continue + + # 全局多边形点与面积 + global_points = np.array(poly.points, dtype=float) + global_area = abs(gdstk.Polygon(global_points).area()) + + # 与区块矩形做相交,可能返回多个多边形 + clipped = gdstk.boolean([poly], [rect], "and", precision=1e-3, layer=layer, datatype=datatype) + clipped_points_list: List[np.ndarray] = [] + clipped_area = 0.0 + if clipped: + for cpoly in clipped: + pts = np.array(cpoly.points, dtype=float) + if pts.size == 0: + continue + area = abs(gdstk.Polygon(pts).area()) + if area <= 0: + continue + clipped_points_list.append(pts) + clipped_area += area + + area_ratio = (clipped_area / global_area) if global_area > 0 else 0.0 + is_partial = area_ratio < 0.999 # 允许微小数值误差 + + geometries.append({ + "global_points": global_points, + "global_bbox": (p_xmin, p_ymin, p_xmax, p_ymax), + "global_area": float(global_area), + "clipped_points_list": clipped_points_list, # 可能包含多个裁剪片段 + "clipped_area": float(clipped_area), + "area_ratio": float(area_ratio), + "is_partial": bool(is_partial), + "layer": layer_idx, + "patch_bbox": (x_min, y_min, x_max, y_max), + }) + return geometries diff --git a/src/data/graph_constructor.py b/src/data/graph_constructor.py index 147229a..9a2efe5 100644 --- a/src/data/graph_constructor.py +++ b/src/data/graph_constructor.py @@ -1,4 +1,4 @@ -from typing import List, Dict +from typing import List, Dict, Tuple import torch from torch_geometric.data import Data from scipy.spatial import cKDTree @@ -32,21 +32,64 @@ class GraphConstructor: if not geometries: return None - node_features = [] - node_positions = [] - # 提取每个几何图形的特征 - for geo in geometries: - x_min, y_min, x_max, y_max = geo["bbox"] - width = x_max - x_min - height = y_max - y_min - area = width * height - centroid_x = x_min + width / 2 - centroid_y = y_min + height / 2 + node_features: List[List[float]] = [] + node_positions: List[List[float]] = [] + node_layers: List[int] = [] + node_meta: List[Dict] = [] + + # 提取每个几何图形的特征(优先使用裁剪后片段的质心;若无裁剪片段,则使用全局质心) + for geo in geometries: + layer_idx: int = int(geo["layer"]) if "layer" in geo else 0 + global_bbox = geo.get("global_bbox", None) + global_points = geo.get("global_points", None) + clipped_points_list = geo.get("clipped_points_list", []) or [] + clipped_area = float(geo.get("clipped_area", 0.0)) + global_area = float(geo.get("global_area", 0.0)) + area_ratio = float(geo.get("area_ratio", 0.0)) + is_partial = bool(geo.get("is_partial", False)) + + # 选择用于节点位置与宽高的几何:若存在裁剪片段,聚合其外接框,否则用全局框 + if clipped_points_list: + # 合并所有裁剪片段点,计算整体外接框与质心 + all_pts = np.vstack(clipped_points_list) + elif global_points is not None: + all_pts = np.array(global_points, dtype=float) + else: + # 回退到 bbox 信息(兼容旧格式) + x_min, y_min, x_max, y_max = geo["bbox"] + all_pts = np.array([[x_min, y_min], [x_max, y_max]], dtype=float) + + x_min, y_min = np.min(all_pts, axis=0) + x_max, y_max = np.max(all_pts, axis=0) + width = float(x_max - x_min) + height = float(y_max - y_min) + centroid_x = float(x_min + width / 2.0) + centroid_y = float(y_min + height / 2.0) + + # 节点特征:质心、宽、高、裁剪面积、全局面积占比、层索引(数值化) + features = [ + centroid_x, + centroid_y, + width, + height, + clipped_area, + (clipped_area / global_area) if global_area > 0 else 0.0, + float(layer_idx), + 1.0 if is_partial else 0.0, + ] - # 特征包括:中心点坐标、宽度、高度、面积 - features = [centroid_x, centroid_y, width, height, area] node_features.append(features) node_positions.append([centroid_x, centroid_y]) + node_layers.append(layer_idx) + # 将原始与裁剪的必要元信息保存在 Data 中(以便后续可视化与调试) + node_meta.append({ + "layer": layer_idx, + "global_bbox": tuple(global_bbox) if global_bbox is not None else None, + "global_area": global_area, + "clipped_area": clipped_area, + "area_ratio": area_ratio, + "is_partial": is_partial, + }) # 将特征和位置转换为 PyTorch 张量 x = torch.tensor(node_features, dtype=torch.float) @@ -57,6 +100,9 @@ class GraphConstructor: # 创建图数据对象 data = Data(x=x, edge_index=edge_index, pos=pos, y=torch.tensor([label], dtype=torch.float)) + # 附加层索引与元信息(元信息以对象列表形式保存,供上层使用;不会参与张量运算) + data.layer = torch.tensor(node_layers, dtype=torch.long) + data.node_meta = node_meta return data def _create_edges(self, node_positions: torch.Tensor) -> torch.Tensor: