{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,4,23]],"date-time":"2026-04-23T07:56:19Z","timestamp":1776930979387,"version":"3.51.2"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,11,16]]},"DOI":"10.1145\/3712285.3759772","type":"proceedings-article","created":{"date-parts":[[2025,11,12]],"date-time":"2025-11-12T16:04:47Z","timestamp":1762963487000},"page":"991-1005","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":2,"title":["TurboFNO: High-Performance Fourier Neural Operator with Fused FFT-GEMM-iFFT on GPU"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0000-0001-8637-3307","authenticated-orcid":false,"given":"Shixun","family":"Wu","sequence":"first","affiliation":[{"name":"University of California, Riverside, Riverside, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-2688-8058","authenticated-orcid":false,"given":"Yujia","family":"Zhai","sequence":"additional","affiliation":[{"name":"University of California, Riverside, Riverside, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0002-9628-3326","authenticated-orcid":false,"given":"Huangliang","family":"Dai","sequence":"additional","affiliation":[{"name":"University of California Riverside, Riverside, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0009-2273-5618","authenticated-orcid":false,"given":"Yue","family":"Zhu","sequence":"additional","affiliation":[{"name":"University of California, Riverside, Riverside, USA"}]},{"ORCID":"https:\/\/orcid.org\/0009-0005-8299-6124","authenticated-orcid":false,"given":"Haiyang","family":"Hu","sequence":"additional","affiliation":[{"name":"University of California, Riverside, Riverside, USA"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-2578-4940","authenticated-orcid":false,"given":"Zizhong","family":"Chen","sequence":"additional","affiliation":[{"name":"University of California, Riverside, Riverside, USA"}]}],"member":"320","published-online":{"date-parts":[[2025,11,15]]},"reference":[{"key":"e_1_3_3_3_2_2","unstructured":"Retrieved in 2024. NVIDIA CUFFT Documentation. https:\/\/docs.nvidia.com\/cuda\/cufft. Online."},{"key":"e_1_3_3_3_3_2","doi-asserted-by":"publisher","DOI":"10.1142\/9789812777072_0001"},{"key":"e_1_3_3_3_4_2","doi-asserted-by":"crossref","unstructured":"Alberto Castro Heiko Appel Micael Oliveira Carlo\u00a0A Rozzi Xavier Andrade Florian Lorenzen Miguel\u00a0AL Marques EKU Gross and Angel Rubio. 2006. Octopus: a tool for the application of time-dependent density functional theory. physica status solidi (b) 243 11 (2006) 2465\u20132488.","DOI":"10.1002\/pssb.200642067"},{"key":"e_1_3_3_3_5_2","unstructured":"Ciprian Chelba Mia Chen Ankur Bapna and Noam Shazeer. 2020. Faster transformer decoding: N-gram masked self-attention. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2001.04589 (2020)."},{"key":"e_1_3_3_3_6_2","doi-asserted-by":"crossref","unstructured":"Hanning Chen Jeffrey\u00a0M McMahon Mark\u00a0A Ratner and George\u00a0C Schatz. 2010. Classical electrodynamics coupled to quantum mechanics for calculation of molecular optical properties: a RT-TDDFT\/FDTD approach. The Journal of Physical Chemistry C 114 34 (2010) 14384\u201314392.","DOI":"10.1021\/jp1043392"},{"key":"e_1_3_3_3_7_2","first-page":"578","volume-title":"13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18)","author":"Chen Tianqi","year":"2018","unstructured":"Tianqi Chen, Thierry Moreau, Ziheng Jiang, Lianmin Zheng, Eddie Yan, Haichen Shen, Meghan Cowan, Leyuan Wang, Yuwei Hu, Luis Ceze, et\u00a0al. 2018. { TVM} : An automated { End-to-End} optimizing compiler for deep learning. In 13th USENIX Symposium on Operating Systems Design and Implementation (OSDI 18). 578\u2013594."},{"key":"e_1_3_3_3_8_2","doi-asserted-by":"crossref","unstructured":"Xiaopo Cheng Christina Caruso Wilbur\u00a0A Lam and Michael\u00a0D Graham. 2023. Marginated aberrant red blood cells induce pathologic vascular stress fluctuations in a computational model of hematologic disorders. Science Advances 9 48 (2023) eadj6423.","DOI":"10.1126\/sciadv.adj6423"},{"key":"e_1_3_3_3_9_2","doi-asserted-by":"crossref","unstructured":"Xiaopo Cheng Christina Caruso Wilbur\u00a0A Lam and Michael\u00a0D Graham. 2025. Red blood cell partitioning and segregation through vascular bifurcations in a model of sickle cell disease. Soft Matter (2025).","DOI":"10.1039\/D4SM01519C"},{"key":"e_1_3_3_3_10_2","unstructured":"Huangliang Dai Shixun Wu Hairui Zhao Jiajun Huang Zizhe Jian Yue Zhu Haiyang Hu and Zizhong Chen. 2025. FT-Transformer: Resilient and Reliable Transformer with End-to-End Fault Tolerant Attention. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2504.02211 (2025)."},{"key":"e_1_3_3_3_11_2","doi-asserted-by":"crossref","unstructured":"Tri Dao Dan Fu Stefano Ermon Atri Rudra and Christopher R\u00e9. 2022. Flashattention: Fast and memory-efficient exact attention with io-awareness. Advances in neural information processing systems 35 (2022) 16344\u201316359.","DOI":"10.52202\/068431-1189"},{"key":"e_1_3_3_3_12_2","unstructured":"Daniel\u00a0Y Fu Hermann Kumbong Eric Nguyen and Christopher R\u00e9. 2023. Flashfftconv: Efficient convolutions for long sequences with tensor cores. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2311.05908 (2023)."},{"key":"e_1_3_3_3_13_2","doi-asserted-by":"crossref","unstructured":"Chao Gao and Sai\u00a0Qian Zhang. 2024. Dlora: Distributed parameter-efficient fine-tuning solution for large language model. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2404.05182 (2024).","DOI":"10.18653\/v1\/2024.findings-emnlp.802"},{"key":"e_1_3_3_3_14_2","unstructured":"Sahar\u00a0Ghoflsaz Ghinani Jingyao Zhang and Elaheh Sadredini. 2025. Enabling Low-Cost Secure Computing on Untrusted In-Memory Architectures. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2501.17292 (2025)."},{"key":"e_1_3_3_3_15_2","doi-asserted-by":"crossref","unstructured":"Paolo Giannozzi Stefano Baroni Nicola Bonini Matteo Calandra Roberto Car Carlo Cavazzoni Davide Ceresoli Guido\u00a0L Chiarotti Matteo Cococcioni Ismaila Dabo et\u00a0al. 2009. QUANTUM ESPRESSO: a modular and open-source software project for quantumsimulations of materials. Journal of physics: Condensed matter 21 39 (2009) 395502.","DOI":"10.1088\/0953-8984\/21\/39\/395502"},{"key":"e_1_3_3_3_16_2","doi-asserted-by":"crossref","unstructured":"Stefan Goedecker. 1999. Linear scaling electronic structure methods. Reviews of Modern Physics 71 4 (1999) 1085.","DOI":"10.1103\/RevModPhys.71.1085"},{"key":"e_1_3_3_3_17_2","doi-asserted-by":"publisher","DOI":"10.1145\/2503210.2504566"},{"key":"e_1_3_3_3_18_2","doi-asserted-by":"publisher","DOI":"10.1145\/3710848.3710897"},{"key":"e_1_3_3_3_19_2","unstructured":"Zeyu Han Chao Gao Jinyang Liu Sai\u00a0Qian Zhang et\u00a0al. 2024. Parameter-efficient fine-tuning for large models: A comprehensive survey. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2403.14608 (2024)."},{"key":"e_1_3_3_3_20_2","doi-asserted-by":"publisher","DOI":"10.1109\/BigData59044.2023.10386386"},{"key":"e_1_3_3_3_21_2","doi-asserted-by":"crossref","unstructured":"J\u00fcrg Hutter Marcella Iannuzzi Florian Schiffmann and Joost VandeVondele. 2014. cp2k: atomistic simulations of condensed matter systems. Wiley Interdisciplinary Reviews: Computational Molecular Science 4 1 (2014) 15\u201325.","DOI":"10.1002\/wcms.1159"},{"key":"e_1_3_3_3_22_2","doi-asserted-by":"publisher","DOI":"10.1109\/ICCD63220.2024.00023"},{"key":"e_1_3_3_3_23_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS57955.2024.00044"},{"key":"e_1_3_3_3_24_2","doi-asserted-by":"crossref","unstructured":"Jeremy Johnston Xiao-Yang Liu Shixun Wu and Xiaodong Wang. 2023. A curriculum learning approach to optimization with application to downlink beamforming. IEEE Transactions on Signal Processing 72 (2023) 84\u201398.","DOI":"10.1109\/TSP.2023.3334396"},{"key":"e_1_3_3_3_25_2","doi-asserted-by":"publisher","DOI":"10.1109\/Allerton58177.2023.10313512"},{"key":"e_1_3_3_3_26_2","doi-asserted-by":"crossref","unstructured":"Gabriel Kotliar Sergej\u00a0Y Savrasov Kristjan Haule Viktor\u00a0S Oudovenko O Parcollet and CA Marianetti. 2006. Electronic structure calculations with dynamical mean-field theory. Reviews of Modern Physics 78 3 (2006) 865\u2013951.","DOI":"10.1103\/RevModPhys.78.865"},{"key":"e_1_3_3_3_27_2","doi-asserted-by":"crossref","unstructured":"Hongliang Li Hairui Zhao Ting Sun Xiang Li Haixiao Xu and Keqin Li. 2024. Interference-aware opportunistic job placement for shared distributed deep learning clusters. J. Parallel and Distrib. Comput. 183 (2024) 104776.","DOI":"10.1016\/j.jpdc.2023.104776"},{"key":"e_1_3_3_3_28_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER52292.2023.00022"},{"key":"e_1_3_3_3_29_2","volume-title":"2021 Fall Western Sectional Meeting","author":"Li Zongyi","year":"2021","unstructured":"Zongyi Li. 2021. Neural operator: Learning maps between function spaces. In 2021 Fall Western Sectional Meeting. AMS."},{"key":"e_1_3_3_3_30_2","unstructured":"Zongyi Li Nikola Kovachki Kamyar Azizzadenesheli Burigede Liu Kaushik Bhattacharya Andrew Stuart and Anima Anandkumar. 2020. Fourier neural operator for parametric partial differential equations. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2010.08895 (2020)."},{"key":"e_1_3_3_3_31_2","unstructured":"Shaolin Liao. 2006. The Taylor Interpolation through FFT Algorithm for Electromagnetic Wave Propagation and Scattering. arXiv preprint physics\/0610057 (2006)."},{"key":"e_1_3_3_3_32_2","doi-asserted-by":"crossref","unstructured":"Jinyang Liu Sheng Di Kai Zhao Xin Liang Sian Jin Zizhe Jian Jiajun Huang Shixun Wu Zizhong Chen and Franck Cappello. 2024. High-performance effective scientific error-bounded lossy compression with auto-tuned multi-component interpolation. Proceedings of the ACM on Management of Data 2 1 (2024) 1\u201327.","DOI":"10.1145\/3639259"},{"key":"e_1_3_3_3_33_2","doi-asserted-by":"publisher","DOI":"10.1109\/SC41406.2024.00019"},{"key":"e_1_3_3_3_34_2","volume-title":"ICLR 2023 Workshop on Physics for Machine Learning","author":"Liu Xiao-Yang","year":"2023","unstructured":"Xiao-Yang Liu, Zechu Li, Shixun Wu, and Xiaodong Wang. 2023. Stationary deep reinforcement learning with quantum k-spin hamiltonian regularization. In ICLR 2023 Workshop on Physics for Machine Learning."},{"key":"e_1_3_3_3_35_2","doi-asserted-by":"crossref","unstructured":"Takahiro Murashima Shingo Urata and Shaofan Li. 2019. Coupling finite element method with large scale atomic\/molecular massively parallel simulator (LAMMPS) for hierarchical multiscale simulations: Modeling and simulation of amorphous polymeric materials. The European Physical Journal B 92 (2019) 1\u20138.","DOI":"10.1140\/epjb\/e2019-100105-9"},{"key":"e_1_3_3_3_36_2","volume-title":"cuBLAS Library","author":"Corporation NVIDIA","year":"2023","unstructured":"NVIDIA Corporation. 2023. cuBLAS Library. NVIDIA. https:\/\/docs.nvidia.com\/cuda\/cublas\/index.html."},{"key":"e_1_3_3_3_37_2","volume-title":"cuFFT Library","author":"Corporation NVIDIA","year":"2023","unstructured":"NVIDIA Corporation. 2023. cuFFT Library. NVIDIA. https:\/\/docs.nvidia.com\/cuda\/cufft\/index.html."},{"key":"e_1_3_3_3_38_2","volume-title":"TensorRT: High Performance Deep Learning Inference Optimizer and Runtime","author":"Corporation NVIDIA","year":"2023","unstructured":"NVIDIA Corporation. 2023. TensorRT: High Performance Deep Learning Inference Optimizer and Runtime. NVIDIA. https:\/\/developer.nvidia.com\/tensorrt."},{"key":"e_1_3_3_3_39_2","unstructured":"A Paszke. 2019. Pytorch: An imperative style high-performance deep learning library. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1912.01703 (2019)."},{"key":"e_1_3_3_3_40_2","unstructured":"Jaideep Pathak Shashank Subramanian Peter Harrington Sanjeev Raja Ashesh Chattopadhyay Morteza Mardani Thorsten Kurth David Hall Zongyi Li Kamyar Azizzadenesheli et\u00a0al. 2022. Fourcastnet: A global data-driven high-resolution weather model using adaptive fourier neural operators. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2202.11214 (2022)."},{"key":"e_1_3_3_3_41_2","volume-title":"FFTs for (mostly) Particle Codes within the DOE Exascale Computing Project.","author":"Plimpton Steven\u00a0J","year":"2017","unstructured":"Steven\u00a0J Plimpton. 2017. FFTs for (mostly) Particle Codes within the DOE Exascale Computing Project. Technical Report. Sandia National Lab.(SNL-NM), Albuquerque, NM (United States)."},{"key":"e_1_3_3_3_42_2","doi-asserted-by":"publisher","DOI":"10.1145\/3394486.3406703"},{"key":"e_1_3_3_3_43_2","doi-asserted-by":"crossref","unstructured":"Alberto Scotti and Ugo Piomelli. 2002. Turbulence models in pulsating flows. AIAA journal 40 3 (2002) 537\u2013544.","DOI":"10.2514\/2.1679"},{"key":"e_1_3_3_3_44_2","unstructured":"Mohammad Shoeybi Mostofa Patwary Raul Puri Patrick LeGresley Jared Casper and Bryan Catanzaro. 2019. Megatron-lm: Training multi-billion parameter language models using model parallelism. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/1909.08053 (2019)."},{"key":"e_1_3_3_3_45_2","doi-asserted-by":"publisher","DOI":"10.1145\/3315508.3329973"},{"key":"e_1_3_3_3_46_2","doi-asserted-by":"crossref","unstructured":"Dmitrii Tolmachev. 2023. VkFFT-a performant cross-platform and open-source GPU FFT library. IEEE Access 11 (2023) 12039\u201312058.","DOI":"10.1109\/ACCESS.2023.3242240"},{"key":"e_1_3_3_3_47_2","volume-title":"Turbulence modeling for CFD","author":"Wilcox David\u00a0C","year":"1998","unstructured":"David\u00a0C Wilcox et\u00a0al. 1998. Turbulence modeling for CFD. Vol.\u00a02. DCW industries La Canada, CA."},{"key":"e_1_3_3_3_48_2","doi-asserted-by":"publisher","DOI":"10.1109\/CLUSTER59578.2024.00035"},{"key":"e_1_3_3_3_49_2","unstructured":"Shixun Wu Jinwen Pan Jinyang Liu Jiannan Tian Ziwei Qiu Jiajun Huang Kai Zhao Xin Liang Sheng Di Zizhong Chen et\u00a0al. 2025. Boosting Scientific Error-Bounded Lossy Compression through Optimized Synergistic Lossy-Lossless Orchestration. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2507.11165 (2025)."},{"key":"e_1_3_3_3_50_2","unstructured":"Shixun Wu Krishnan Raghavan Sheng Di Zizhong Chen and Franck Cappello. 2024. DGRO: Diameter-Guided Ring Optimization for Integrated Research Infrastructure Membership. arXiv preprint arXiv:https:\/\/arXiv.org\/abs\/2410.11142 (2024)."},{"key":"e_1_3_3_3_51_2","doi-asserted-by":"publisher","DOI":"10.1145\/3588195.3595947"},{"key":"e_1_3_3_3_52_2","doi-asserted-by":"publisher","DOI":"10.1145\/3710848.3710853"},{"key":"e_1_3_3_3_53_2","doi-asserted-by":"publisher","DOI":"10.1145\/3577193.3593715"},{"key":"e_1_3_3_3_54_2","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS54959.2023.00042"},{"key":"e_1_3_3_3_55_2","doi-asserted-by":"publisher","DOI":"10.23919\/DATE51398.2021.9474215"},{"key":"e_1_3_3_3_56_2","doi-asserted-by":"publisher","DOI":"10.1109\/DAC56929.2023.10247691"},{"key":"e_1_3_3_3_57_2","doi-asserted-by":"publisher","DOI":"10.1145\/3531437.3539699"},{"key":"e_1_3_3_3_58_2","doi-asserted-by":"publisher","DOI":"10.1145\/3508352.3549381"},{"key":"e_1_3_3_3_59_2","doi-asserted-by":"publisher","DOI":"10.1109\/INFOCOM55648.2025.11044666"},{"key":"e_1_3_3_3_60_2","first-page":"440","volume-title":"CCF International Conference on Natural Language Processing and Chinese Computing","author":"Zhao Hairui","year":"2024","unstructured":"Hairui Zhao, Xinyu Li, and Hongliang Li. 2024. Visage: Visual-Aware Generation of Adversarial Examples in Black-Box for Text Classification. In CCF International Conference on Natural Language Processing and Chinese Computing. Springer, 440\u2013453."},{"key":"e_1_3_3_3_61_2","first-page":"143","volume-title":"2025 USENIX Annual Technical Conference (USENIX ATC 25)","author":"Zhao Hairui","year":"2025","unstructured":"Hairui Zhao, Qi Tian, Hongliang Li, and Zizhong Chen. 2025. { FlexPipe} : Maximizing Training Efficiency for Transformer-based Models with { Variable-Length} Inputs. In 2025 USENIX Annual Technical Conference (USENIX ATC 25). 143\u2013159."}],"event":{"name":"SC '25: The International Conference for High Performance Computing, Networking, Storage and Analysis","location":"St. Louis MO USA","acronym":"SC '25","sponsor":["SIGHPC ACM Special Interest Group on High Performance Computing, Special Interest Group on High Performance Computing"]},"container-title":["Proceedings of the International Conference for High Performance Computing, Networking, Storage and Analysis"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3712285.3759772","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2026,3,11]],"date-time":"2026-03-11T18:41:30Z","timestamp":1773254490000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3712285.3759772"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,11,15]]},"references-count":60,"alternative-id":["10.1145\/3712285.3759772","10.1145\/3712285"],"URL":"https:\/\/doi.org\/10.1145\/3712285.3759772","relation":{},"subject":[],"published":{"date-parts":[[2025,11,15]]},"assertion":[{"value":"2025-11-15","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}