64 lines
1.6 KiB
BibTeX
64 lines
1.6 KiB
BibTeX
@misc{metr2025,
|
|
author = {METR},
|
|
title = {Measuring AI Ability to Complete Long Tasks},
|
|
year = {2025},
|
|
url = {https://metr.org/blog/2025-03-19-measuring-ai-ability-to-complete-long-tasks/}
|
|
}
|
|
|
|
@article{acl2024,
|
|
author = {Author(s) of ACL Paper}, % Replace with actual author(s)
|
|
title = {PowerPoint Task Completion: Evaluating LLMs on Multi-Turn, Multi-Modal Instructions},
|
|
year = {2024},
|
|
journal = {Findings of the Association for Computational Linguistics},
|
|
note = {ACL 2024},
|
|
url = {https://aclanthology.org/2024.findings-acl.514.pdf}
|
|
}
|
|
|
|
@misc{gorilla,
|
|
author = {Gorilla Team},
|
|
title = {Gorilla APIBench Leaderboard},
|
|
year = {2025},
|
|
url = {https://gorilla.cs.berkeley.edu/leaderboard.html}
|
|
}
|
|
|
|
@misc{aider,
|
|
author = {Aider Team},
|
|
title = {Aider.chat Leaderboards},
|
|
year = {2025},
|
|
url = {https://aider.chat/docs/leaderboards/}
|
|
}
|
|
|
|
@misc{metaculus,
|
|
author = {Metaculus},
|
|
title = {Metaculus: AI Prediction Markets},
|
|
year = {2025},
|
|
url = {https://www.metaculus.com/questions/?topic=ai}
|
|
}
|
|
|
|
@misc{predictit,
|
|
author = {PredictIt},
|
|
title = {PredictIt Markets},
|
|
year = {2025},
|
|
url = {https://www.predictit.org/}
|
|
}
|
|
@misc{polymarket,
|
|
author = {Polymarket},
|
|
title = {Polymarket: AI Markets},
|
|
year = {2025},
|
|
url = {https://polymarket.com/markets/all/ai}
|
|
}
|
|
|
|
@techreport{aisafety,
|
|
author = {International AI Safety Report Team},
|
|
title = {International AI Safety Report},
|
|
year = {2025},
|
|
institution = {International AI Safety Report},
|
|
url = {https://arxiv.org/abs/2501.17805}
|
|
}
|
|
|
|
@misc{trackingai,
|
|
author = {TrackingAI Team},
|
|
title = {TrackingAI.org},
|
|
year = {2025},
|
|
url = {https://trackingai.org/home}
|
|
}
|