@misc{metr2025, author = {METR}, title = {Measuring AI Ability to Complete Long Tasks}, year = {2025}, url = {https://metr.org/blog/2025-03-19-measuring-ai-ability-to-complete-long-tasks/} } @article{acl2024, author = {Author(s) of ACL Paper}, % Replace with actual author(s) title = {PowerPoint Task Completion: Evaluating LLMs on Multi-Turn, Multi-Modal Instructions}, year = {2024}, journal = {Findings of the Association for Computational Linguistics}, note = {ACL 2024}, url = {https://aclanthology.org/2024.findings-acl.514.pdf} } @misc{gorilla, author = {Gorilla Team}, title = {Gorilla APIBench Leaderboard}, year = {2025}, url = {https://gorilla.cs.berkeley.edu/leaderboard.html} } @misc{aider, author = {Aider Team}, title = {Aider.chat Leaderboards}, year = {2025}, url = {https://aider.chat/docs/leaderboards/} } @misc{metaculus, author = {Metaculus}, title = {Metaculus: AI Prediction Markets}, year = {2025}, url = {https://www.metaculus.com/questions/?topic=ai} } @misc{predictit, author = {PredictIt}, title = {PredictIt Markets}, year = {2025}, url = {https://www.predictit.org/} } @misc{polymarket, author = {Polymarket}, title = {Polymarket: AI Markets}, year = {2025}, url = {https://polymarket.com/markets/all/ai} } @techreport{aisafety, author = {International AI Safety Report Team}, title = {International AI Safety Report}, year = {2025}, institution = {International AI Safety Report}, url = {https://arxiv.org/abs/2501.17805} } @misc{trackingai, author = {TrackingAI Team}, title = {TrackingAI.org}, year = {2025}, url = {https://trackingai.org/home} }