@inproceedings{chen2025octopus,title={Octopus: On-device language model for function calling of software {APIs}},author={Chen, Wei and Li, Zhiyuan and Ma, Mingyuan},booktitle={Proceedings of the 2025 Conference of the North American Chapter of the Association for Computational Linguistics: Industry Track},pages={329--339},year={2025},}
EMAS
Octo-planner: On-device Language Model for Planner-Action Agents
Wei Chen, Zhiyuan Li, Zhen Guo, and 1 more author
In Proceedings of the Workshop on Empowering Multi-Agent Systems (EMAS 2025), 2025
@inproceedings{chen2025octoplanner,title={Octo-planner: On-device Language Model for Planner-Action Agents},author={Chen, Wei and Li, Zhiyuan and Guo, Zhen and Shen, Yikang},booktitle={Proceedings of the Workshop on Empowering Multi-Agent Systems (EMAS 2025)},year={2025},}
ICDM
DP-FedLoRA: Privacy-Enhanced Federated Fine-Tuning for On-Device Large Language Models
Honghui Xu, Shiva Shrestha, Wei Chen, and 2 more authors
In IEEE International Conference on Data Mining (ICDM), 2025
@inproceedings{xu2025dpfedlora,title={{DP-FedLoRA}: Privacy-Enhanced Federated Fine-Tuning for On-Device Large Language Models},author={Xu, Honghui and Shrestha, Shiva and Chen, Wei and Li, Zhiyuan and Cai, Zhipeng},booktitle={IEEE International Conference on Data Mining (ICDM)},year={2025},note={Best Paper Runner-Up Award},}
2024
arXiv
AutoNeural: Co-Designing Vision-Language Models for NPU Inference
Wei Chen, Liangmin Wu, Yunhai Hu, and 8 more authors
@article{chen2024autoneural,title={{AutoNeural}: Co-Designing Vision-Language Models for {NPU} Inference},author={Chen, Wei and Wu, Liangmin and Hu, Yunhai and Li, Zhiyuan and Cheng, Zhiyuan and Qian, Yicheng and Zhu, Lingyue and Hu, Zhipeng and Liang, Luoyi and Tang, Qiang and others},journal={arXiv preprint arXiv:2512.02924},year={2024},}
arXiv
OmniVLM: A Token-Compressed, Sub-Billion-Parameter Vision-Language Model for Efficient On-Device Inference
@article{chen2024omnivlm,title={{OmniVLM}: A Token-Compressed, Sub-Billion-Parameter Vision-Language Model for Efficient On-Device Inference},author={Chen, Wei and Li, Zhiyuan and Xin, Shuo},journal={arXiv preprint arXiv:2412.11475},year={2024},}
arXiv
Dolphin: Long Context as a New Modality for Energy-Efficient On-Device Language Models
@article{chen2024dolphin,title={Dolphin: Long Context as a New Modality for Energy-Efficient On-Device Language Models},author={Chen, Wei and Li, Zhiyuan and Xin, Shuo and Wang, Yihao},journal={arXiv preprint arXiv:2408.15518},year={2024},}
@article{chen2024octopusv4,title={Octopus v4: Graph of language models},author={Chen, Wei and Li, Zhiyuan},journal={arXiv preprint arXiv:2404.19296},year={2024},}
arXiv
Octopus v3: Technical Report for On-device Sub-billion Multimodal AI Agent
@article{chen2024octopusv2,title={Octopus v2: On-device language model for super agent},author={Chen, Wei and Li, Zhiyuan},journal={arXiv preprint arXiv:2404.01744},year={2024},}
arXiv
On-Device Language Models: A Comprehensive Review
Jiajun Xu, Zhiyuan Li, Wei Chen, and 4 more authors
@article{xu2024ondevicereview,title={On-Device Language Models: A Comprehensive Review},author={Xu, Jiajun and Li, Zhiyuan and Chen, Wei and Wang, Qun and Gao, Xin and Cai, Qi and Ling, Ziyuan},journal={arXiv preprint arXiv:2409.00088},year={2024},}