Publications

@article{miao26flexllm,
  title = {FlexLLM: Token-Level Co-Serving of LLM Inference and Fine-Tuning with SLO Guarantees},
  author = {Oliaro, Gabriele and Miao, Xupeng and Cheng, Xinhao and Kada, Vineeth and Gao, Ruohan and Huang, Yingyi and Delacourt, Remi and Yang, April and Wang, Yingcheng and Wu, Mengdi and Unger, Colin and Jia, Zhihao},
  journal = {Proceedings of NSDI Conference},
  year = {2026},
  cofirst = {true},
}

EuroSys

AdaServe: Accelerating Multi-SLO LLM Serving with SLO-Customized Speculative Decoding

Zikun Li, Zhuofu Chen, Remi Delacourt, Gabriele Oliaro and 10 more authors

Proceedings of EuroSys Conference 2026

arXiv Bib CCF-A

@article{li26adaserve,
  title = {AdaServe: Accelerating Multi-SLO LLM Serving with SLO-Customized Speculative Decoding},
  author = {Li, Zikun and Chen, Zhuofu and Delacourt, Remi and Oliaro, Gabriele and Wang, Zeyu and Chen, Qinghan and Lin, Shuhuai and Yang, April and Zhang, Zhihao and Chen, Zhuoming and Lai, Sean and Cheng, Xinhao and Miao, Xupeng and Jia, Zhihao},
  journal = {Proceedings of EuroSys Conference},
  year = {2026},
}

CSUR

Towards Efficient Generative Large Language Model Serving: A Survey from Algorithms to Systems

Xupeng Miao, Gabriele Oliaro, Zhihao Zhang, Xinhao Cheng and 3 more authors

ACM Computing Surveys 2026

arXiv Bib

@article{miao26serving,
  title = {Towards Efficient Generative Large Language Model Serving: A Survey from Algorithms to Systems},
  author = {Miao, Xupeng and Oliaro, Gabriele and Zhang, Zhihao and Cheng, Xinhao and Jin, Hongyi and Chen, Tianqi and Jia, Zhihao},
  journal = {ACM Computing Surveys},
  year = {2026},
}

2025

OSDI

Mirage: A Multi-Level Superoptimizer for Tensor Programs

Mengdi Wu, Xinhao Cheng, Shengyu Liu, Chunan Shi and 6 more authors

Proceedings of OSDI Conference 2025

arXiv Bib CCF-A

@article{wang25mirage,
  title = {Mirage: A Multi-Level Superoptimizer for Tensor Programs},
  author = {Wu, Mengdi and Cheng, Xinhao and Liu, Shengyu and Shi, Chunan and Ji, Jianan and Ao, Kit and Velliengiri, Praveen and Miao, Xupeng and Padan, Oded and Jia, Zhihao},
  journal = {Proceedings of OSDI Conference},
  year = {2025},
}

ASPLOS

Helix: Distributed Serving of Large Language Models via Max-Flow on Heterogeneous GPUs

Yixuan Mei, Yonghao Zhuang, Xupeng Miao, Juncheng Yang and 2 more authors

Proceedings of ASPLOS Conference 2025

arXiv Bib CCF-A

@article{mei25helix,
  title = {Helix: Distributed Serving of Large Language Models via Max-Flow on Heterogeneous GPUs},
  author = {Mei, Yixuan and Zhuang, Yonghao and Miao, Xupeng and Yang, Juncheng and Jia, Zhihao and Vinayak, Rashmi},
  journal = {Proceedings of ASPLOS Conference},
  year = {2025},
}

ASPLOS

GraphPipe: Improving Performance and Scalability of DNN Training with Graph Pipeline Parallelism

Byungsoo Jeon, Mengdi Wu, Shiyi Cao, Sunghyun Kim and 10 more authors

Proceedings of ASPLOS Conference 2025

arXiv Bib CCF-A

@article{jeon25graphpipe,
  title = {GraphPipe: Improving Performance and Scalability of DNN Training with Graph Pipeline Parallelism},
  author = {Jeon, Byungsoo and Wu, Mengdi and Cao, Shiyi and Kim, Sunghyun and Park, Sunghyun and Aggarwal, Neeraj and Unger, Colin and Arfeen, Daiyaan and Liao, Peiyuan and Miao, Xupeng and Alizadeh, Mohammad and Ganger, Gregory R. and Chen, Tianqi and Jia, Zhihao},
  journal = {Proceedings of ASPLOS Conference},
  year = {2025},
}

ASPLOS

Spindle: Efficient Distributed Training of Multi-Task Large Models via Wavefront Scheduling

Yujie Wang, Shenhan Zhu, Fangcheng Fu, Xupeng Miao and 5 more authors

Proceedings of ASPLOS Conference 2025

arXiv Bib CCF-A

@article{wang25spindle,
  title = {Spindle: Efficient Distributed Training of Multi-Task Large Models via Wavefront Scheduling},
  author = {Wang, Yujie and Zhu, Shenhan and Fu, Fangcheng and Miao, Xupeng and Zhang, Jie and Zhu, Juan and Hong, Fan and Li, Yong and Cui, Bin},
  journal = {Proceedings of ASPLOS Conference},
  year = {2025},
}

SIGMOD

PQCache: Product Quantization-based KVCache for Long Context LLM Inference

Hailin Zhang, Xiaodong Ji, Yilin Chen, Fangcheng Fu and 4 more authors

Proceedings of SIGMOD Conference 2025

arXiv Bib CCF-A

@article{zhang25pqcache,
  title = {PQCache: Product Quantization-based KVCache for Long Context LLM Inference},
  author = {Zhang, Hailin and Ji, Xiaodong and Chen, Yilin and Fu, Fangcheng and Miao, Xupeng and Nie, Xiaonan and Chen, Weipeng and Cui, Bin},
  journal = {Proceedings of SIGMOD Conference},
  year = {2025},
}

ICLR

NetMoE: Accelerating MoE Training through Dynamic Sample Placement (Spotlight)

Xinyi Liu, Yujie Wang, Fangcheng Fu, Xupeng Miao and 3 more authors

Proceedings of ICLR Conference 2025

Bib

@article{liu2025netmoe,
  author = {Liu, Xinyi and Wang, Yujie and Fu, Fangcheng and Miao, Xupeng and Zhu, Shenhan and Nie, Xiaonan and Cui, Bin},
  title = {NetMoE: Accelerating MoE Training through Dynamic Sample Placement},
  journal = {Proceedings of ICLR Conference},
  year = {2025}
}

ICML

Demystifying Cost-Efficiency in LLM Serving over Heterogeneous GPUs

Youhe Jiang, Fangcheng Fu, Xiaozhe Yao, Guoliang He and 5 more authors

Proceedings of ICML Conference 2025

Bib CCF-A

@article{jiang2025demy,
  author = {Jiang, Youhe and Fu, Fangcheng and Yao, Xiaozhe and He, Guoliang and Miao, Xupeng and Klimovic, Ana and Cui, Bin and Yuan, Binhang and Yoneki, Eiko},
  title = {Demystifying Cost-Efficiency in LLM Serving over Heterogeneous GPUs},
  journal = {Proceedings of ICML Conference},
  year = {2025}
}

VLDBJ

Efficient and Scalable Huge Embedding Model Training via Distributed Cache Management (Special Issue on Best Papers of VLDB 2022)

Xupeng Miao, Hailin Zhang, Yining Shi, Xiaonan Nie and 4 more authors

The VLDB Journal 2025

Bib PDF CCF-A

@article{miao2025het,
  title = {Efficient and Scalable Huge Embedding Model Training via Distributed Cache Management},
  author = {Miao, Xupeng and Zhang, Hailin and Shi, Yining and Nie, Xiaonan and Yang, Zhi and Tao, Yangyu and Jiang, Jie and Cui, Bin},
  journal = {The VLDB Journal},
  year = {2025}
}

2024

NeurIPS

LSH-MoE: Communication-efficient MoE Training via Locality-Sensitive Hashing

Xiaonan Nie, Qibin Liu, Fangcheng Fu, Shenhan Zhu and 5 more authors

Proceedings of NeurIPS Conference 2024

arXiv Bib CCF-A

@article{nie2024lshmoe,
  author = {Nie, Xiaonan and Liu, Qibin and Fu, Fangcheng and Zhu, Shenhan and Miao, Xupeng and Li, Xiaoyang and Zhang, Yang and Liu, Shouda and Cui, Bin},
  title = {LSH-MoE: Communication-efficient MoE Training via Locality-Sensitive Hashing},
  journal = {Proceedings of NeurIPS Conference},
  year = {2024}
}

SOSP

Enabling Parallelism Hot Switching for Efficient Training of Large Language Models

Hao Ge, Fangcheng Fu, Haoyang Li, Xuanyu Wang and 6 more authors

Proceedings of SOSP Conference 2024

Bib PDF CCF-A

@article{ge24hotspa,
  title = {Enabling Parallelism Hot Switching for Efficient Training of Large Language Models},
  author = {Ge, Hao and Fu, Fangcheng and Li, Haoyang and Wang, Xuanyu and Lin, Sheng and Wang, Yujie and Nie, Xiaonan and Zhang, Hailin and Miao, Xupeng and Cui, Bin},
  journal = {Proceedings of SOSP Conference},
  year = {2024},
  publisher = {{ACM}}
}

Atlas: Hierarchical Partitioning for Quantum Circuit Simulation on GPUs

Mingkuan Xu, Shiyi Cao, Xupeng Miao, Umut Acar and 1 more author

Proceedings of SC Conference 2024

arXiv Bib CCF-A

@article{xu24atlas,
  title = {Atlas: Hierarchical Partitioning for Quantum Circuit Simulation on GPUs},
  author = {Xu, Mingkuan and Cao, Shiyi and Miao, Xupeng and Acar, Umut and Jia, Zhihao},
  journal = {Proceedings of SC Conference},
  year = {2024}
}

SIGMOD

Demystifying Data Management for Large Language Models (Tutorial)

Xupeng Miao, Zhihao Jia, and Bin Cui

Proceedings of SIGMOD Conference 2024

Bib PDF CCF-A

@article{miao24dm4llm,
  title = {Demystifying Data Management for Large Language Models},
  author = {Miao, Xupeng and Jia, Zhihao and Cui, Bin},
  journal = {Proceedings of SIGMOD Conference},
  year = {2024},
  publisher = {{ACM}}
}

ASPLOS
SpotServe: Serving Generative Large Language Models on Preemptible Instances (Distinguished Artifact Award), (IEEE Micro Top Picks Honorable Mention)

Xupeng Miao, Chunan Shi, Jiangfei Duan, Xiaoli Xi and 3 more authors

Proceedings of ASPLOS Conference 2024

arXiv Bib CCF-A Available & Functional & Reproduced
@article{miao24spotserve, title = {SpotServe: Serving Generative Large Language Models on Preemptible Instances}, author = {Miao, Xupeng and Shi, Chunan and Duan, Jiangfei and Xi, Xiaoli and Lin, Dahua and Cui, Bin and Jia, Zhihao}, journal = {Proceedings of ASPLOS Conference}, year = {2024}, }

ASPLOS

SpecInfer: Accelerating Generative Large Language Model Serving with Speculative Inference and Token Tree Verification

Xupeng Miao, Gabriele Oliaro, Zhihao Zhang, Xinhao Cheng and 10 more authors

Proceedings of ASPLOS Conference 2024

arXiv Bib CCF-A

@article{miao23specinfer,
  title = {SpecInfer: Accelerating Generative Large Language Model Serving with Speculative Inference and Token Tree Verification},
  author = {Miao, Xupeng and Oliaro, Gabriele and Zhang, Zhihao and Cheng, Xinhao and Wang, Zeyu and Wong, Rae Ying Yee and Zhu, Alan and Yang, Lijie and Shi, Xiaoxiang and Shi, Chunan and Chen, Zhuoming and Arfeen, Daiyaan and Abhyankar, Reyna and Jia, Zhihao},
  journal = {Proceedings of ASPLOS Conference},
  year = {2024},
  doi = {10.48550/arXiv.2305.09781},
}

ASPLOS

Optimal Kernel Orchestration for Tensor Programs with Korch

Muyan Hu, Ashwin Venkatram, Shreyashri Biswas, Balamurugan Marimuthu and 7 more authors

Proceedings of ASPLOS Conference 2024

arXiv Bib CCF-A

@article{hu24korch,
  title = {Optimal Kernel Orchestration for Tensor Programs with Korch},
  author = {Hu, Muyan and Venkatram, Ashwin and Biswas, Shreyashri and Marimuthu, Balamurugan and Hou, Bohan and Oliaro, Gabriele and Wang, Haojie and Zheng, Liyan and Miao, Xupeng and Zhai, Jidong and Jia, Zhihao},
  journal = {Proceedings of ASPLOS Conference},
  year = {2024}
}

NSDI

Parcae: Proactive, Liveput-Optimized DNN Training on Preemptible Instances

Jiangfei Duan¹, Ziang Song¹, Xupeng Miao¹, Xiaoli Xi and 4 more authors

Proceedings of NSDI Conference 2024

arXiv Bib CCF-A

@article{nsdi24parcae,
  title = {Parcae: Proactive, Liveput-Optimized DNN Training on Preemptible Instances},
  author = {Duan, Jiangfei and Song, Ziang and Miao, Xupeng and Xi, Xiaoli and Lin, Dahua and Xu, Harry and Zhang, Minjia and Jia, Zhihao},
  journal = {Proceedings of NSDI Conference},
  cofirst = {true},
  year = {2024},
}

ACL

Quantized Side Tuning: Fast and Memory-Efficient Tuning of Quantized Large Language Models (Outstanding Paper Award)

Zhengxin Zhang, Dan Zhao, Xupeng Miao, Gabriele Oliaro and 3 more authors

Proceedings of ACL Conference 2024

arXiv Bib CCF-A

@article{acl24qst,
  title = {Quantized Side Tuning: Fast and Memory-Efficient Tuning of Quantized Large Language Models},
  author = {Zhang, Zhengxin and Zhao, Dan and Miao, Xupeng and Oliaro, Gabriele and Li, Qing and Jiang, Yong and Jia, Zhihao},
  journal = {Proceedings of ACL Conference},
  year = {2024}
}

IJCAI

X-former Elucidator: Reviving Efficient Attention for Long Context Language Modeling

Xupeng Miao, Shenhan Zhu, Fangcheng Fu, Ziyu Guo and 4 more authors

Proceedings of IJCAI Conference 2024

Bib PDF CCF-A

@article{ijcai24xformer,
  title = {X-former Elucidator: Reviving Efficient Attention for Long Context Language Modeling},
  author = {Miao, Xupeng and Zhu, Shenhan and Fu, Fangcheng and Guo, Ziyu and Yang, Zhi and Tu, Yaofeng and Jia, Zhihao and Cui, Bin},
  journal = {Proceedings of IJCAI Conference},
  year = {2024}
}

VLDB

Experimental Analysis of Large-scale Learnable Vector Storage Compression

Hailin Zhang, Penghao Zhao, Xupeng Miao, Yingxia Shao and 3 more authors

Proc. VLDB Endow. 2024

arXiv Bib CCF-A

@article{vldb24eazhang,
  title = {Experimental Analysis of Large-scale Learnable Vector Storage Compression},
  author = {Zhang, Hailin and Zhao, Penghao and Miao, Xupeng and Shao, Yingxia and Liu, Zirui and Yang, Tong and Cui, Bin},
  journal = {Proc. {VLDB} Endow.},
  year = {2024},
}

ICDE

MFIX: An Efficient and Reliable Index Advisor via Multi-Fidelity Bayesian Optimization

Zhuo Chang, Xinyi Zhang, Yang Li, Xupeng Miao and 2 more authors

Proceedings of ICDE Conference 2024

Bib CCF-A

@article{icde24mfix,
  title = {MFIX: An Efficient and Reliable Index Advisor via Multi-Fidelity Bayesian Optimization},
  author = {Chang, Zhuo and Zhang, Xinyi and Li, Yang and Miao, Xupeng and Qin, Yanzhao and Cui, Bin},
  journal = {Proceedings of ICDE Conference},
  year = {2024}
}

TKDE

Improving Automatic Parallel Training via Balanced Memory Workload Optimization

Yujie Wang, Youhe Jiang, Xupeng Miao, Fangcheng Fu and 4 more authors

IEEE Transactions on Knowledge and Data Engineering 2024

arXiv Bib CCF-A

@article{wang2024galvatronbmw,
  title = {Improving Automatic Parallel Training via Balanced Memory Workload Optimization},
  author = {Wang, Yujie and Jiang, Youhe and Miao, Xupeng and Fu, Fangcheng and Zhu, Shenhan and Nie, Xiaonan and Tu, Yaofeng and Cui, Bin},
  journal = {IEEE Transactions on Knowledge and Data Engineering},
  year = {2024},
  doi = {10.1109/TKDE.2024.3370614},
  publisher = {IEEE},
}

AAAI

Accelerating Text-to-image Editing via Cache-enabled Sparse Diffusion Inference

Zihao Yu, Haoyang Li, Fangcheng Fu, Xupeng Miao and 1 more author

In Proceedings of AAAI Conference 2024

arXiv Bib CCF-A

@inproceedings{aaai24fisedit,
  author = {Yu, Zihao and Li, Haoyang and Fu, Fangcheng and Miao, Xupeng and Cui, Bin},
  title = {Accelerating Text-to-image Editing via Cache-enabled Sparse Diffusion Inference},
  booktitle = {Proceedings of AAAI Conference},
  year = {2024},
}

EACL

Generative Dense Retrieval: Memory Can Be a Burden

Peiwen Yuan, Xinglin Wang, Shaoxiong Feng, Boyuan Pan and 4 more authors

Proceedings of EACL Conference 2024

arXiv Bib

@article{eacl24yuan,
  title = {Generative Dense Retrieval: Memory Can Be a Burden},
  author = {Yuan, Peiwen and Wang, Xinglin and Feng, Shaoxiong and Pan, Boyuan and Li, Yiwei and Wang, Heda and Miao, Xupeng and Li, Kan},
  journal = {Proceedings of EACL Conference},
  year = {2024},
}

CSUR

Distributed Graph Neural Network Training: A Survey

Yingxia Shao, Hongzheng Li, Xizhi Gu, Hongbo Yin and 5 more authors

ACM Computing Surveys 2024

arXiv Bib

@article{shao2022distributed,
  title = {Distributed Graph Neural Network Training: A Survey},
  author = {Shao, Yingxia and Li, Hongzheng and Gu, Xizhi and Yin, Hongbo and Li, Yawen and Miao, Xupeng and Zhang, Wentao and Cui, Bin and Chen, Lei},
  journal = {ACM Computing Surveys},
  year = {2024},
  doi = {10.48550/arXiv.2211.00216}
}

2023

arXiv

Towards Efficient Generative Large Language Model Serving: A Survey from Algorithms to Systems

Xupeng Miao, Gabriele Oliaro, Zhihao Zhang, Xinhao Cheng and 3 more authors

arXiv preprint arXiv:2312.15234 2023

arXiv Bib

@article{miao23efficient,
  title = {Towards Efficient Generative Large Language Model Serving: A Survey from Algorithms to Systems},
  author = {Miao, Xupeng and Oliaro, Gabriele and Zhang, Zhihao and Cheng, Xinhao and Jin, Hongyi and Chen, Tianqi and Jia, Zhihao},
  journal = {arXiv preprint arXiv:2312.15234},
  year = {2023},
  doi = {10.48550/arXiv.2312.15234}
}

OSDI

EinNet: Optimizing Tensor Programs with Derivation-Based Transformations

Liyan Zheng, Haojie Wang, Jidong Zhai, Muyan Hu and 7 more authors

Proceedings of OSDI Conference 2023

Bib CCF-A

@article{osdi23einnet,
  title = {EinNet: Optimizing Tensor Programs with Derivation-Based Transformations},
  author = {Zheng, Liyan and Wang, Haojie and Zhai, Jidong and Hu, Muyan and Ma, Zixuan and Wang, Tuowei and Huang, Shuhong and Miao, Xupeng and Tang, Shizhi and Huang, Kezhao and Jia, Zhihao},
  journal = {Proceedings of OSDI Conference},
  year = {2023}
}

VLDB

SDPipe: A Semi-Decentralized Framework for Heterogeneity-aware Pipeline-parallel Training

Xupeng Miao, Yining Shi, Zhi Yang, Bin Cui and 1 more author

Proc. VLDB Endow. 2023

Bib CCF-A

@article{miao2023sdpipe,
  title = {SDPipe: A Semi-Decentralized Framework for Heterogeneity-aware Pipeline-parallel Training},
  author = {Miao, Xupeng and Shi, Yining and Yang, Zhi and Cui, Bin and Jia, Zhihao},
  journal = {Proc. {VLDB} Endow.},
  volume = {16},
  year = {2023},
  publisher = {VLDB Endowment},
}

VLDB

Galvatron: Efficient Transformer Training over Multiple GPUs Using Automatic Parallelism

Xupeng Miao, Yujie Wang, Youhe Jiang, Chunan Shi and 3 more authors

Proc. VLDB Endow. 2023

Bib PDF CCF-A

@article{miao2023galvatron,
  title = {Galvatron: Efficient Transformer Training over Multiple GPUs Using Automatic Parallelism},
  author = {Miao, Xupeng and Wang, Yujie and Jiang, Youhe and Shi, Chunan and Nie, Xiaonan and Zhang, Hailin and Cui, Bin},
  journal = {Proc. {VLDB} Endow.},
  volume = {16},
  number = {3},
  pages = {470--479},
  year = {2023},
  doi = {10.14778/3570690.3570697},
  publisher = {VLDB Endowment},
}

VLDB

Angel-PTM: A Scalable and Economical Large-scale Pre-training System in Tencent

Xiaonan Nie, Yi Liu, Fangcheng Fu, Jinbao Xue and 4 more authors

Proc. VLDB Endow. (Industry) 2023

arXiv Bib CCF-A

@article{DBLP:journals/corr/abs-2303-02868,
  title = {Angel-PTM: A Scalable and Economical Large-scale Pre-training System in Tencent},
  author = {Nie, Xiaonan and Liu, Yi and Fu, Fangcheng and Xue, Jinbao and Jiao, Dian and Miao, Xupeng and Tao, Yangyu and Cui, Bin},
  journal = {Proc. {VLDB} Endow. (Industry)},
  year = {2023},
  doi = {10.48550/arXiv.2303.02868}
}

SIGMOD

FlexMoE: Scaling Large-scale Sparse Pre-trained Model Training via Dynamic Device Placement

Xiaonan Nie, Xupeng Miao, Zilong Wang, Jilong Xue and 4 more authors

Proceedings of SIGMOD Conference 2023

Bib CCF-A

@article{nie2023flexmoe,
  title = {FlexMoE: Scaling Large-scale Sparse Pre-trained Model Training via Dynamic Device Placement},
  author = {Nie, Xiaonan and Miao, Xupeng and Wang, Zilong and Xue, Jilong and Ma, Lingxiao and Yang, Zichao and Cao, Gang and Cui, Bin},
  journal = {Proceedings of SIGMOD Conference},
  year = {2023},
  publisher = {{ACM}}
}

IJCAI

OSDP: Optimal Sharded Data Parallel for Distributed Deep Learning

Youhe Jiang, Fangcheng Fu, Xupeng Miao, Xiaonan Nie and 1 more author

Proceedings of IJCAI Conference 2023

arXiv Bib CCF-A

@article{jiang2023osdp,
  author = {Jiang, Youhe and Fu, Fangcheng and Miao, Xupeng and Nie, Xiaonan and Cui, Bin},
  title = {OSDP: Optimal Sharded Data Parallel for Distributed Deep Learning},
  journal = {Proceedings of IJCAI Conference},
  year = {2023},
}

NeurIPS

Model-enhanced Vector Index

Hailin Zhang, Yujing Wang, Qi Chen, Ruiheng Chang and 15 more authors

Proceedings of NeurIPS Conference 2023

arXiv Bib CCF-A

@article{zhang2023mevi,
  author = {Zhang, Hailin and Wang, Yujing and Chen, Qi and Chang, Ruiheng and Zhang, Ting and Miao, Ziming and Hou, Yingyan and Ding, Yang and Miao, Xupeng and Wang, Haonan and Pang, Bochen and Zhan, Yuefeng and Sun, Hao and Deng, Weiwei and Zhang, Qi and Yang, Fan and Xie, Xing and Yang, Mao and Cui, Bin},
  title = {Model-enhanced Vector Index},
  journal = {Proceedings of NeurIPS Conference},
  year = {2023},
}

NeurIPS

Accelerating Text-to-image Editing via Cache-enabled Sparse Diffusion Inference

Zihao Yu, Haoyang Li, Fangcheng Fu, Xupeng Miao and 1 more author

In Proceedings of NeurIPS ML for Systems (MLSys) Workshop 2023

arXiv Bib HTML

@inproceedings{fisedit2023,
  author = {Yu, Zihao and Li, Haoyang and Fu, Fangcheng and Miao, Xupeng and Cui, Bin},
  title = {Accelerating Text-to-image Editing via Cache-enabled Sparse Diffusion Inference},
  booktitle = {Proceedings of NeurIPS ML for Systems (MLSys) Workshop},
  year = {2023},
}

AAAI

CALIP: Zero-Shot Enhancement of CLIP with Parameter-free Attention

Ziyu Guo, Renrui Zhang, Longtian Qiu, Xianzheng Ma and 3 more authors

Proceedings of AAAI Conference 2023

arXiv Bib CCF-A

@article{guo2023calip,
  title = {CALIP: Zero-Shot Enhancement of CLIP with Parameter-free Attention},
  author = {Guo, Ziyu and Zhang, Renrui and Qiu, Longtian and Ma, Xianzheng and Miao, Xupeng and He, Xuming and Cui, Bin},
  journal = {Proceedings of AAAI Conference},
  year = {2023},
  publisher = {{AAAI}}
}

2022

SCIS

Hetu: A highly efficient automatic parallel distributed deep learning system

Xupeng Miao, Xiaonan Nie, Hailin Zhang, Tong Zhao and 1 more author

Sci. China Inf. Sci. 2022

Bib PDF CCF-T1

@article{DBLP:journals/chinaf/MiaoXP22,
  author = {Miao, Xupeng and Nie, Xiaonan and Zhang, Hailin and Zhao, Tong and Cui, Bin},
  title = {Hetu:  A highly efficient automatic parallel distributed deep learning system},
  journal = {Sci. China Inf. Sci.},
  url = {http://engine.scichina.com/doi/10.1007/s11432-022-3581-9},
  doi = {10.1007/s11432-022-3581-9},
  year = {2022},
}

VLDB

HET: Scaling out Huge Embedding Model Training via Cache-enabled Distributed Framework (Best Scalable Data Science Paper Award)

Xupeng Miao, Hailin Zhang, Yining Shi, Xiaonan Nie and 3 more authors

Proc. VLDB Endow. 2022

Bib PDF CCF-A

@article{miao2021het,
  title = {{HET:} Scaling out Huge Embedding Model Training via Cache-enabled Distributed Framework},
  author = {Miao, Xupeng and Zhang, Hailin and Shi, Yining and Nie, Xiaonan and Yang, Zhi and Tao, Yangyu and Cui, Bin},
  journal = {Proc. {VLDB} Endow.},
  volume = {15},
  number = {2},
  pages = {312--320},
  year = {2022},
  publisher = {VLDB Endowment},
}

VLDB

Towards Communication-efficient Vertical Federated Learning Training via Cache-enabled Local Updates

Fangcheng Fu, Xupeng Miao, Jiawei Jiang, Huanran Xue and 1 more author

Proc. VLDB Endow. 2022

arXiv Bib CCF-A

@article{DBLP:journals/corr/abs-2207-14628,
  author = {Fu, Fangcheng and Miao, Xupeng and Jiang, Jiawei and Xue, Huanran and Cui, Bin},
  title = {Towards Communication-efficient Vertical Federated Learning Training
                 via Cache-enabled Local Updates},
  journal = {Proc. {VLDB} Endow.},
  year = {2022},
  doi = {10.48550/arXiv.2207.14628},
}

SIGMOD

HET-GMP: A Graph-based System Approach to Scaling Large Embedding Model Training

Xupeng Miao, Yining Shi, Hailin Zhang, Xin Zhang and 3 more authors

In Proceedings of SIGMOD Conference 2022

Bib PDF CCF-A