baladithyab

Wave 5: full publication-materials drafts (pre-experimental release set)

639a760 12 days ago

3.54 kB

	# CITATION.cff — Citation File Format
	# https://citation-file-format.github.io/
	# Used by HF, GitHub, Zenodo to render a "Cite this repository" UI.

	cff-version: 1.2.0
	message: "If you use this framework or its derivative artifacts, please cite as below."
	type: software
	title: "Composer 2.5 Replication Framework: Methodology and Integration Architecture for Open Replication of Cursor's Agentic Coding Recipe"
	abstract: >
	An open-source methodology and integration architecture for replicating
	Cursor's Composer 2.5 recipe on a HuggingFace base model, plus a novel
	multi-teacher trace-replay distillation reward channel that complements
	the published SDPO/OPSD method (which Cursor's "Targeted RL with Textual
	Feedback" uses). Pre-experimental v0.0 release: methodology paper, audited
	recipe mapping, integration architecture across TRL/VeRL/OpenEnv,
	empirical economic-feasibility result for the novel channel ($0.98/trace),
	and a working code skeleton with 38 passing unit tests.

	authors:
	- family-names: "Codeseys"
	given-names: ""
	affiliation: "Independent researcher"
	# Replace with real ORCID if available:
	# orcid: "https://orcid.org/0000-0000-0000-0000"

	repository-code: "https://huggingface.co/Codeseys/composer-replication-framework"
	url: "https://huggingface.co/Codeseys/composer-replication-framework"
	date-released: "2026-05-25"
	version: "0.0.0"
	license: "MIT"

	keywords:
	- reinforcement-learning
	- post-training
	- distillation
	- agentic-coding
	- composer-2.5
	- cursor
	- kimi-k2
	- grpo
	- dapo
	- sdpo
	- opsd
	- trl
	- verl
	- openenv
	- llm

	# Primary upstream works this framework depends on / cites
	references:
	- type: article
	title: "Introducing Composer 2.5"
	authors:
	- name: "Cursor Team"
	year: 2026
	url: "https://cursor.com/blog/composer-2-5"

	- type: article
	title: "Self-Distilled Reasoner: On-Policy Self-Distillation for Large Language Models"
	authors:
	- family-names: "Zhao"
	given-names: "Siyan"
	- family-names: "Xie"
	given-names: "Zhihui"
	- family-names: "Liu"
	given-names: "Mengchen"
	- family-names: "Huang"
	given-names: "Jing"
	- family-names: "Pang"
	given-names: "Guan"
	- family-names: "Chen"
	given-names: "Feiyu"
	- family-names: "Grover"
	given-names: "Aditya"
	year: 2026
	url: "https://arxiv.org/abs/2601.18734"
	notes: "OPSD — single-LLM self-distillation; provides the reference loss implementation lifted by this framework."

	- type: article
	title: "Reinforcement Learning via Self-Distillation"
	authors:
	- family-names: "Hübotter"
	given-names: "Jonas"
	- family-names: "Lübeck"
	given-names: "Frederike"
	- family-names: "Behric"
	given-names: "Lejs"
	- family-names: "Baumann"
	given-names: "Anton"
	- family-names: "Bagatella"
	given-names: "Marco"
	- family-names: "Marta"
	given-names: "Daniel"
	- family-names: "Hakimi"
	given-names: "Ido"
	- family-names: "Shenfeld"
	given-names: "Idan"
	- family-names: "Buening"
	given-names: "Thomas Kleine"
	- family-names: "Guestrin"
	given-names: "Carlos"
	- family-names: "Krause"
	given-names: "Andreas"
	year: 2026
	url: "https://arxiv.org/abs/2601.20802"
	notes: "SDPO — formalizes the same mechanism as Cursor's Targeted RL with Textual Feedback. ICLR 2026 Scaling Post-training Workshop."