The future of AI depends on whether we can design next generation hardware that better supports the scaling laws. At some point of time, AI architecture will even be influenced by the design decision on AI hardware. The codesign of AI and hardware will become norm in the future.
Here are some considerations on AI hardware.
An architecture considering activation outliers in LLM inferencing. OliVe (Guo et al., 2023)
@inproceedings{lutdla25,title={LUT-DLA: Lookup Table as Efficient Extreme Low-Bit Deep Learning Accelerator},author={Li, Guoyu and Ye, Shengyu and Chen, Chunyun and Wang, Yang and Yang, Fan and Cao, Ting and Liu, Cheng and Sabry, Mohamed M and Yang, Mao},booktitle={31st International Symposium on High-Performance Computer Architecture, {HPCA}},year={2025}}
@article{waferllm25,title={WaferLLM: A Wafer-Scale LLM Inference System},author={He, Congjie and Huang, Yeqi and Mu, Pei and Miao, Ziming and Xue, Jilong and Ma, Lingxiao and Yang, Fan and Mai, Luo},year={2025},journal={ArXiv},}
@article{molutcore2024,title={LUT Tensor Core: Lookup Table Enables Efficient Low-Bit LLM Inference Acceleration},author={Mo, Zhiwen and Wang, Lei and Wei, Jianyu and Zeng, Zhichen and Cao, Shijie and Ma, Lingxiao and Jing, Naifeng and Cao, Ting and Xue, Jilong and Yang, Fan and Yang, Mao},year={2024},journal={ArXiv},}
@inproceedings{DBLP:conf/isca/0003THL00LG023,title={OliVe: Accelerating Large Language Models via Hardware-friendly Outlier-Victim Pair Quantization},author={Guo, Cong and Tang, Jiaming and Hu, Weiming and Leng, Jingwen and Zhang, Chen and Yang, Fan and Liu, Yunxin and Guo, Minyi and Zhu, Yuhao},year={2023},booktitle={Proceedings of the 50th Annual International Symposium on Computer Architecture, {ISCA}},}
Highlighted as an IEEE Micro Top Picks Honorable Mention in the July/August special edition of IEEE Micro 2023
@inproceedings{DBLP:conf/micro/00030LL0LG022,title={{ANT:} Exploiting Adaptive Numerical Data Type for Low-bit Deep Neural Network Quantization},author={Guo, Cong and Zhang, Chen and Leng, Jingwen and Liu, Zihan and Yang, Fan and Liu, Yunxin and Guo, Minyi and Zhu, Yuhao},year={2022},booktitle={55th {IEEE/ACM} International Symposium on Microarchitecture, {MICRO}},}