% Auto-generated by scripts/merge-refs.js — do not edit by hand.
% Source: epagogy/papers/ml/{grammar,landscape,shortpath}/refs.bib

@article{dwork2015reusable,
  title   = {The Reusable Holdout: Preserving Validity in Adaptive Data Analysis},
  author  = {Dwork, Cynthia and Feldman, Vitaly and Hardt, Moritz and Pitassi, Toniann and Reingold, Omer and Roth, Aaron},
  journal = {Science},
  volume  = {349},
  number  = {6248},
  pages   = {636--638},
  year    = {2015},
  url     = {https://www.science.org/doi/10.1126/science.aaa9375}
}

@inproceedings{blum2015ladder,
  title     = {The Ladder: A Reliable Leaderboard for Machine Learning Competitions},
  author    = {Blum, Avrim and Hardt, Moritz},
  booktitle = {Proceedings of the 32nd International Conference on Machine Learning (ICML)},
  pages     = {1006--1014},
  year      = {2015},
  url       = {https://arxiv.org/abs/1502.04585}
}

@article{ballarin2024reservoir,
  title   = {Reservoir Computing for Macroeconomic Forecasting with Mixed-Frequency Data},
  author  = {Ballarin, Giovanni and Dellaportas, Petros and Grigoryeva, Lyudmila and Hirt, Marcel and {van Huellen}, Sophie and Ortega, Juan-Pablo},
  journal = {International Journal of Forecasting},
  volume  = {40},
  number  = {3},
  pages   = {1206--1237},
  year    = {2024},
  doi     = {10.1016/j.ijforecast.2023.10.009}
}

@article{tampu2022inflation,
  title   = {Inflation of Test Accuracy Due to Data Leakage in Deep Learning-Based Classification of {OCT} Images},
  author  = {Tampu, Iulian Emil and Eklund, Anders and Haj-Hosseini, Neda},
  journal = {Scientific Data},
  volume  = {9},
  pages   = {580},
  year    = {2022},
  doi     = {10.1038/s41597-022-01618-6}
}

@article{pedregosa2011scikit,
  title   = {Scikit-learn: Machine Learning in {Python}},
  author  = {Pedregosa, Fabian and Varoquaux, Ga\"{e}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and Duchesnay, \'{E}douard},
  journal = {Journal of Machine Learning Research},
  volume  = {12},
  pages   = {2825--2830},
  year    = {2011},
  url     = {https://jmlr.org/papers/v12/pedregosa11a.html}
}

@article{stone1974cross,
  title   = {Cross-Validatory Choice and Assessment of Statistical Predictions},
  author  = {Stone, Mervyn},
  journal = {Journal of the Royal Statistical Society: Series B (Methodological)},
  volume  = {36},
  number  = {2},
  pages   = {111--133},
  year    = {1974},
  doi     = {10.1111/j.2517-6161.1974.tb00994.x}
}

@article{bousquet2002stability,
  title   = {Stability and Generalization},
  author  = {Bousquet, Olivier and Elisseeff, Andr\'{e}},
  journal = {Journal of Machine Learning Research},
  volume  = {2},
  pages   = {499--526},
  year    = {2002},
  url     = {https://jmlr.org/papers/v2/bousquet02a.html}
}

@inproceedings{buitinck2013sklearn,
  title     = {{API} Design for Machine Learning Software: Experiences from the Scikit-Learn Project},
  author    = {Buitinck, Lars and Louppe, Gilles and Blondel, Mathieu and Pedregosa, Fabian and Mueller, Andreas and Grisel, Olivier and Niculae, Vlad and Prettenhofer, Peter and Gramfort, Alexandre and Grobler, Jaques and Layton, Robert and VanderPlas, Jake and Joly, Arnaud and Holt, Brian and Varoquaux, Ga\"{e}l},
  booktitle = {ECML PKDD Workshop: Languages for Data Mining and Machine Learning},
  pages     = {108--122},
  year      = {2013},
  url       = {https://arxiv.org/abs/1309.0238}
}

@article{bischl2021mlr3pipelines,
  title   = {mlr3pipelines: Flexible Machine Learning Pipelines in {R}},
  author  = {Binder, Martin and Pfisterer, Florian and Lang, Michel and Schneider, Lona and Kotthoff, Lars and Bischl, Bernd},
  journal = {Journal of Machine Learning Research},
  volume  = {22},
  number  = {184},
  pages   = {1--7},
  year    = {2021},
  url     = {https://jmlr.org/papers/v22/21-0206.html}
}

@article{cawley2010overfitting,
  title   = {On Over-fitting in Model Selection and Subsequent Selection Bias in Performance Evaluation},
  author  = {Cawley, Gavin C. and Talbot, Nicola L. C.},
  journal = {Journal of Machine Learning Research},
  volume  = {11},
  pages   = {2079--2107},
  year    = {2010},
  url     = {https://jmlr.org/papers/v11/cawley10a.html}
}

@book{bertin1967semiologie,
  title     = {S\'emiologie Graphique},
  author    = {Bertin, Jacques},
  year      = {1967},
  publisher = {Mouton/Gauthier-Villars},
  address   = {Paris}
}

@book{chomsky1957syntactic,
  title     = {Syntactic Structures},
  author    = {Chomsky, Noam},
  year      = {1957},
  publisher = {Mouton},
  address   = {The Hague}
}

@article{codd1970relational,
  title   = {A Relational Model of Data for Large Shared Data Banks},
  author  = {Codd, Edgar F.},
  journal = {Communications of the ACM},
  volume  = {13},
  number  = {6},
  pages   = {377--387},
  year    = {1970},
  doi     = {10.1145/362384.362685}
}

@inproceedings{drobnjakovic2024abstract,
  title     = {Abstract Interpretation for Data Leakage Detection in Machine Learning Pipelines},
  author    = {Drobnjakovi\'{c}, Filip and Suboti\'{c}, Pavle and Urban, Caterina},
  booktitle = {Theoretical Aspects of Software Engineering (TASE 2024)},
  year      = {2024},
  doi       = {10.1007/978-3-031-64626-3_7},
  note      = {arXiv:2211.16073}
}

@misc{drori2019alphad3m,
  title   = {{AlphaD3M}: Machine Learning Pipeline Synthesis and {AutoML}},
  author  = {Drori, Iddo and Krishnamurthy, Yamuna and Rampin, Remi and {de Paula Lourenco}, Raoni and Ono, Jorge Piazentin and Cho, Kyunghyun and Silva, Claudio and Freire, Juliana},
  year    = {2021},
  note    = {arXiv:2111.02508}
}

@book{hastie2009elements,
  title     = {The Elements of Statistical Learning: Data Mining, Inference, and Prediction},
  author    = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome},
  edition   = {2nd},
  year      = {2009},
  publisher = {Springer},
  address   = {New York},
  url       = {https://hastie.su.domains/ElemStatLearn/}
}

@misc{kapoor2025living,
  title        = {Leakage and the Reproducibility Crisis in {ML}-Based Science --- Living Survey},
  author       = {Kapoor, Sayash and Narayanan, Arvind},
  year         = {2025},
  url          = {https://reproducible.cs.princeton.edu},
  note         = {648 papers across 30 fields as of May 2024; continuously updated}
}

@article{kapoor2024reforms,
  title   = {{REFORMS}: Consensus-Based Recommendations for Machine-Learning-Based Science},
  author  = {Kapoor, Sayash and Cantrell, Emily M. and Peng, Kenny and Pham, Thanh Hien and Bail, Christopher A. and Gundersen, Odd Erik and Hofman, Jake M. and Hullman, Jessica and Lones, Michael A. and Malik, Momin M. and Nanayakkara, Priyanka and Poldrack, Russell A. and Raji, Inioluwa Deborah and Roberts, Michael and Salganik, Matthew J. and Serra-Garcia, Marta and Stewart, Brandon M. and Vandewiele, Gilles and Narayanan, Arvind},
  journal = {Science Advances},
  volume  = {10},
  number  = {18},
  pages   = {eadk3452},
  year    = {2024},
  doi     = {10.1126/sciadv.adk3452}
}

@article{kaufman2012leakage,
  title   = {Leakage in Data Mining: Formulation, Detection, and Avoidance},
  author  = {Kaufman, Shachar and Rosset, Saharon and Perlich, Claudia and Stitelman, Ori},
  journal = {ACM Transactions on Knowledge Discovery from Data},
  volume  = {6},
  number  = {4},
  pages   = {1--21},
  year    = {2012},
  doi     = {10.1145/2382577.2382579}
}

@book{kuhn2022tidy,
  title     = {Tidy Modeling with {R}},
  author    = {Kuhn, Max and Silge, Julia},
  year      = {2022},
  publisher = {O'Reilly Media},
  address   = {Sebastopol, CA},
  url       = {https://www.tmwr.org}
}

@article{riley2019minimum,
  title   = {Minimum Sample Size for Developing a Multivariable Prediction Model: Part {I} --- Continuous Outcomes},
  author  = {Riley, Richard D. and Snell, Kym I. E. and Ensor, Joie and Burke, Danielle L. and Harrell, Frank E. and Moons, Karel G. M. and Collins, Gary S.},
  journal = {Statistics in Medicine},
  volume  = {38},
  number  = {7},
  pages   = {1262--1275},
  year    = {2019},
  doi     = {10.1002/sim.7993}
}

@inproceedings{myers1999jflow,
  title     = {{JFlow}: Practical Mostly-Static Information Flow Control},
  author    = {Myers, Andrew C.},
  booktitle = {Proceedings of the 26th ACM SIGPLAN-SIGACT Symposium on Principles of Programming Languages (POPL)},
  pages     = {228--241},
  year      = {1999},
  doi       = {10.1145/292540.292561}
}

@article{rosenblatt2024data,
  title   = {Data Leakage Inflates Prediction Performance in Connectome-Based Machine Learning Models},
  author  = {Rosenblatt, Matthew and Tejavibulya, Link and Jiang, Rongtao and Noble, Stephanie and Scheinost, Dustin},
  journal = {Nature Communications},
  volume  = {15},
  pages   = {1829},
  year    = {2024},
  doi     = {10.1038/s41467-024-46150-w}
}

@phdthesis{roth2022biased,
  title  = {Biased Machines in the Realm of Politics},
  author = {Roth, Simon},
  year   = {2022},
  school = {Universit\"{a}t Konstanz},
  type   = {Dr.\ rer.\ soc.\ dissertation},
  url    = {https://kops.uni-konstanz.de/handle/123456789/59732}
}

@article{roth2026landscape,
  title  = {Which Leakage Types Matter? A Quantitative Landscape Across 2,047 Benchmark Datasets},
  author = {Roth, Simon},
  year   = {2026},
  doi    = {10.5281/zenodo.19406148},
  note   = {Preprint}
}

@unpublished{roth2026shortpath,
  title  = {The Shortest Path Leaks: How {LLM}-Generated {ML} Pipelines Inherit and Amplify Data Leakage},
  author = {Roth, Simon},
  year   = {2026},
  note   = {In preparation}
}

@article{vandemortel2025leakage,
  title   = {Data Leakage in Machine Learning Studies Creep into Meta-Analytic Estimates},
  author  = {{van de Mortel}, Thomas F. and {van Wingen}, Guido A.},
  journal = {Molecular Psychiatry},
  year    = {2025},
  doi     = {10.1038/s41380-025-03336-y}
}

@article{vanderploeg2014modern,
  title   = {Modern Modelling Techniques Are Data Hungry: A Simulation Study for Predicting Dichotomous Endpoints},
  author  = {{van der Ploeg}, Tjeerd and Austin, Peter C. and Steyerberg, Ewout W.},
  journal = {BMC Medical Research Methodology},
  volume  = {14},
  pages   = {137},
  year    = {2014},
  doi     = {10.1186/1471-2288-14-137}
}

@article{wickham2010layered,
  title   = {A Layered Grammar of Graphics},
  author  = {Wickham, Hadley},
  journal = {Journal of Computational and Graphical Statistics},
  volume  = {19},
  number  = {1},
  pages   = {3--28},
  year    = {2010},
  doi     = {10.1198/jcgs.2009.07098}
}

@book{wilkinson1999grammar,
  title     = {The Grammar of Graphics},
  author    = {Wilkinson, Leland},
  year      = {1999},
  publisher = {Springer},
  address   = {New York},
  series    = {Statistics and Computing}
}

@article{strom1986typestate,
  title   = {Typestate: A Programming Language Concept for Enhancing Software Reliability},
  author  = {Strom, Robert E. and Yemini, Shaula},
  journal = {IEEE Transactions on Software Engineering},
  volume  = {SE-12},
  number  = {1},
  pages   = {157--171},
  year    = {1986},
  doi     = {10.1109/TSE.1986.6312929}
}

@inproceedings{smith2020mlbazaar,
  title     = {An Interactive Pipeline for Cross-Domain {AutoML} Search},
  author    = {Smith, Micah J. and Sala, Carles and Kanter, James Max and Veeramachaneni, Kalyan},
  booktitle = {Proceedings of the 2020 ACM SIGMOD International Conference on Management of Data},
  pages     = {2735--2738},
  year      = {2020},
  doi       = {10.1145/3318464.3384693},
  note      = {arXiv:1905.08942}
}

@inproceedings{yang2022leakage,
  title     = {Data Leakage in Notebooks: Static Detection and Better Processes},
  author    = {Yang, Chenyang and Brower-Sinning, Rachel A. and Lewis, Grace A. and Kaestner, Christian},
  booktitle = {Proceedings of the 37th {IEEE/ACM} International Conference on Automated Software Engineering},
  pages     = {1--12},
  year      = {2022},
  doi       = {10.1145/3551349.3556918}
}

@incollection{scriven1967methodology,
  title     = {The Methodology of Evaluation},
  author    = {Scriven, Michael},
  booktitle = {Perspectives of Curriculum Evaluation},
  editor    = {Tyler, Ralph W. and Gagn{\'e}, Robert M. and Scriven, Michael},
  year      = {1967},
  publisher = {Rand McNally},
  address   = {Chicago},
  pages     = {39--83}
}

@book{gumbel1958statistics,
  title     = {Statistics of Extremes},
  author    = {Gumbel, Emil Julius},
  year      = {1958},
  publisher = {Columbia University Press},
  address   = {New York},
  note      = {Dover reprint 2004}
}

@article{bates2024crossvalidation,
  title   = {Cross-Validation: What Does It Estimate and How Well Does It Do It?},
  author  = {Bates, Stephen and Hastie, Trevor and Tibshirani, Robert},
  journal = {Journal of the American Statistical Association},
  volume  = {119},
  number  = {546},
  pages   = {1434--1445},
  year    = {2024},
  doi     = {10.1080/01621459.2023.2197686}
}

@article{romano2021pmlb,
  title   = {{PMLB} v1.0: An Open Source Dataset Collection for Benchmarking Machine Learning Methods},
  author  = {Romano, Joseph D. and Le, Trang T. and La Cava, William and Gregg, John T. and Goldberg, Daniel J. and Chakraborty, Praneel and Ray, Natasha L. and Himmelstein, Daniel and Fu, Weixuan and Moore, Jason H.},
  journal = {Bioinformatics},
  volume  = {38},
  number  = {3},
  pages   = {878--880},
  year    = {2021},
  doi     = {10.1093/bioinformatics/btab347}
}

@article{roberts2017cross,
  title   = {Cross-validation strategies for data with temporal, spatial, hierarchical, or phylogenetic structure},
  author  = {Roberts, David R. and Bahn, Volker and Ciuti, Simone and Boyce, Mark S. and Elith, Jane and Guillera-Arroita, Gurutzeta and Hauenstein, Severin and Lahoz-Monfort, Jos{\'e} J. and Schr{\"o}der, Boris and Thuiller, Wilfried and Warton, David I. and Wintle, Brendan A. and Hartig, Florian and Dormann, Carsten F.},
  journal = {Ecography},
  volume  = {40},
  pages   = {913--929},
  year    = {2017},
  url     = {https://nsojournals.onlinelibrary.wiley.com/doi/10.1111/ecog.02881}
}

@article{valavi2019blockcv,
  title   = {block{CV}: An {R} package for generating spatially or environmentally separated folds for k-fold cross-validation of species distribution models},
  author  = {Valavi, Roozbeh and Elith, Jane and Lahoz-Monfort, Jos{\'e} J. and Guillera-Arroita, Gurutzeta},
  journal = {Methods in Ecology and Evolution},
  volume  = {10},
  pages   = {225--232},
  year    = {2019},
  url     = {https://besjournals.onlinelibrary.wiley.com/doi/10.1111/2041-210X.13107}
}

@article{kapoor2023leakage,
  title     = {Leakage and the Reproducibility Crisis in Machine-Learning-Based Science},
  author    = {Kapoor, Sayash and Narayanan, Arvind},
  journal   = {Patterns},
  volume    = {4},
  number    = {9},
  pages     = {100804},
  year      = {2023},
  publisher = {Elsevier},
  doi       = {10.1016/j.patter.2023.100804}
}

@misc{kapoor2022leakage,
  title        = {Leakage and the Reproducibility Crisis in {ML}-Based Science},
  author       = {Kapoor, Sayash and Narayanan, Arvind},
  year         = {2022},
  archivePrefix = {arXiv},
  eprint       = {2207.07048},
  url          = {https://arxiv.org/abs/2207.07048},
  note         = {329 papers in the original preprint}
}

@article{ambroise2002selection,
  title   = {Selection Bias in Gene Extraction on the Basis of Microarray Gene-Expression Data},
  author  = {Ambroise, Christophe and McLachlan, Geoffrey J.},
  journal = {Proceedings of the National Academy of Sciences},
  volume  = {99},
  number  = {10},
  pages   = {6562--6566},
  year    = {2002},
  doi     = {10.1073/pnas.102102699}
}

@article{varma2006bias,
  title   = {Bias in Error Estimation when Using Cross-Validation for Model Selection},
  author  = {Varma, Sudhir and Simon, Richard},
  journal = {BMC Bioinformatics},
  volume  = {7},
  pages   = {91},
  year    = {2006},
  doi     = {10.1186/1471-2105-7-91}
}

@article{vandewiele2021overly,
  title   = {Overly Optimistic Prediction Results on Imbalanced Data: A Case Study of Flaws and Benefits when Applying Over-Sampling},
  author  = {Vandewiele, Gilles and Dehaene, Isabelle and Kov{\'a}cs, Gy{\"o}rgy and Sterckx, Lucas and Janssens, Olivier and Ongenae, Femke and {De Backere}, Femke and {De Turck}, Filip and Roelens, Kristien and Decruyenaere, Johan and {Van Hoecke}, Sofie and Demeester, Thomas},
  journal = {Artificial Intelligence in Medicine},
  volume  = {111},
  pages   = {101987},
  year    = {2021},
  doi     = {10.1016/j.artmed.2020.101987}
}

@article{bengio2004noUnbiased,
  title   = {No Unbiased Estimator of the Variance of {K}-Fold Cross-Validation},
  author  = {Bengio, Yoshua and Grandvalet, Yves},
  journal = {Journal of Machine Learning Research},
  volume  = {5},
  pages   = {1089--1105},
  year    = {2004},
  url     = {https://jmlr.org/papers/v5/grandvalet04a.html}
}

@article{varoquaux2018crossvalidation,
  title   = {Cross-validation Failure: Small Sample Sizes Lead to Large Error Bars},
  author  = {Varoquaux, Ga\"{e}l},
  journal = {NeuroImage},
  volume  = {180},
  pages   = {68--77},
  year    = {2018},
  doi     = {10.1016/j.neuroimage.2017.06.061}
}

@article{vanschoren2014openml,
  title   = {{OpenML}: Networked Science in Machine Learning},
  author  = {Vanschoren, Joaquin and van Rijn, Jan N. and Bischl, Bernd and Torgo, Lu\'{i}s},
  journal = {ACM SIGKDD Explorations Newsletter},
  volume  = {15},
  number  = {2},
  pages   = {49--60},
  year    = {2013},
  doi     = {10.1145/2641190.2641198}
}

@article{lakens2013calculating,
  title   = {Calculating and Reporting Effect Sizes to Facilitate Cumulative Science: A Practical Primer for t-Tests and {ANOVAs}},
  author  = {Lakens, Dani\"{e}l},
  journal = {Frontiers in Psychology},
  volume  = {4},
  pages   = {863},
  year    = {2013},
  doi     = {10.3389/fpsyg.2013.00863}
}

@article{kruschke2018rejecting,
  title   = {Rejecting or Accepting Parameter Values in {Bayesian} Estimation},
  author  = {Kruschke, John K.},
  journal = {Advances in Methods and Practices in Psychological Science},
  volume  = {1},
  number  = {2},
  pages   = {270--280},
  year    = {2018},
  doi     = {10.1177/2515245918771304}
}

@article{drobnjakovic2025abstract,
  title   = {Static Analysis by Abstract Interpretation Against Data Leakage in Machine Learning},
  author  = {Drobnjakovi\'{c}, Filip and Suboti\'{c}, Pavle and Urban, Caterina},
  journal = {Science of Computer Programming},
  year    = {2025},
  doi     = {10.1016/j.scico.2025.103338}
}

@misc{truong2025leakagedetector2,
  title     = {{LeakageDetector} 2.0: Analyzing Data Leakage in {Jupyter}-Driven Machine Learning Pipelines},
  author    = {Truong, Owen and Zhang, Terrence and Marchareddy, Arnav and Lee, Ryan and Busold, Jeffery and Socas, Michael and AlOmar, Eman Abdullah},
  year      = {2025},
  note      = {arXiv:2509.15971}
}

@article{apicella2025button,
  title   = {Don't Push the Button! Exploring Data Leakage Risks in Machine Learning and Transfer Learning},
  author  = {Apicella, Andrea and Isgr\`{o}, Francesco and Prevete, Roberto},
  journal = {Artificial Intelligence Review},
  year    = {2025},
  publisher = {Springer},
  doi     = {10.1007/s10462-025-11326-3}
}

@inproceedings{becker2025gap,
  title     = {Mind the Gap: Investigating the Impact of Data Leakage on Machine Learning Predictive Models},
  author    = {Becker, Augusto Exenberger and Recamonde-Mendoza, Mariana},
  booktitle = {Brazilian Conference on Intelligent Systems (BRACIS)},
  series    = {LNCS},
  volume    = {16180},
  publisher = {Springer},
  year      = {2025}
}

@article{roth2026grammar,
  title  = {A Grammar of Machine Learning Workflows: Rejecting Data Leakage at Call Time},
  author = {Roth, Simon},
  year   = {2026},
  doi    = {10.5281/zenodo.19406355},
  note   = {arXiv preprint arXiv:2603.10742}
}

@article{raschka2020modelevaluation,
  title   = {Model Evaluation, Model Selection, and Algorithm Selection in Machine Learning},
  author  = {Raschka, Sebastian},
  journal = {arXiv preprint arXiv:1811.12808},
  year    = {2020},
  doi     = {10.48550/arXiv.1811.12808}
}

@article{lones2024pitfalls,
  title   = {Avoiding Common Machine Learning Pitfalls},
  author  = {Lones, Michael A.},
  journal = {Patterns},
  volume  = {5},
  number  = {10},
  pages   = {101046},
  year    = {2024},
  doi     = {10.1016/j.patter.2024.101046}
}

@article{bischl2023hpo,
  title   = {Hyperparameter Optimization: Foundations, Algorithms, Best Practices and Open Challenges},
  author  = {Bischl, Bernd and Binder, Martin and Lang, Michel and Pielok, Tobias and Richter, Jakob and Coors, Stefan and Thomas, Janek and Ullmann, Theresa and Becker, Marc and Boulesteix, Anne-Laure and Deng, Difan and Lindauer, Marius},
  journal = {WIREs Data Mining and Knowledge Discovery},
  volume  = {13},
  number  = {2},
  pages   = {e1484},
  year    = {2023},
  doi     = {10.1002/widm.1484}
}

@article{tsamardinos2018bootstrap,
  title   = {Bootstrapping the Out-of-Sample Predictions for Efficient and Accurate Cross-Validation},
  author  = {Tsamardinos, Ioannis and Greasidou, Elissavet and Borboudakis, Giorgos},
  journal = {Machine Learning},
  volume  = {107},
  number  = {12},
  pages   = {1895--1922},
  year    = {2018},
  doi     = {10.1007/s10994-018-5714-4}
}

@article{pedregosa2011sklearn,
  title   = {Scikit-learn: Machine Learning in {Python}},
  author  = {Pedregosa, Fabian and Varoquaux, Ga\"{e}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and Vanderplas, Jake and Passos, Alexandre and Cournapeau, David and Brucher, Matthieu and Perrot, Matthieu and Duchesnay, \'{E}douard},
  journal = {Journal of Machine Learning Research},
  volume  = {12},
  pages   = {2825--2830},
  year    = {2011},
  url     = {https://jmlr.org/papers/v12/pedregosa11a.html}
}

@article{guyon2003introduction,
  title   = {An Introduction to Variable and Feature Selection},
  author  = {Guyon, Isabelle and Elisseeff, Andr\'{e}},
  journal = {Journal of Machine Learning Research},
  volume  = {3},
  pages   = {1157--1182},
  year    = {2003},
  url     = {https://jmlr.org/papers/v3/guyon03a/guyon03a.pdf}
}

@article{chawla2002smote,
  title   = {{SMOTE}: Synthetic Minority Over-sampling Technique},
  author  = {Chawla, Nitesh V. and Bowyer, Kevin W. and Hall, Lawrence O. and Kegelmeyer, W. Philip},
  journal = {Journal of Artificial Intelligence Research},
  volume  = {16},
  pages   = {321--357},
  year    = {2002},
  doi     = {10.1613/jair.953}
}

@article{bergstra2012random,
  title   = {Random Search for Hyper-Parameter Optimization},
  author  = {Bergstra, James and Bengio, Yoshua},
  journal = {Journal of Machine Learning Research},
  volume  = {13},
  pages   = {281--305},
  year    = {2012},
  url     = {https://jmlr.org/papers/v13/bergstra12a.html}
}

@article{sasse2025featuretarget,
  title   = {Overview of Leakage Scenarios in Supervised Machine Learning},
  author  = {Sasse, Simon and Nicolaisen-Sobesky, Eliana and Dukart, Juergen and Eickhoff, Simon B. and Gotz, Marlene and Hamdan, Sami and Komeyer, Vivien and Kulkarni, Kaustubh R. and Lahnakoski, Juha M. and Love, Bradley C. and Raimondo, Federico and Patil, Kaustubh R.},
  journal = {Journal of Big Data},
  volume  = {12},
  pages   = {41},
  year    = {2025},
  doi     = {10.1186/s40537-025-01193-8}
}

@article{nadeau2003inference,
  title   = {Inference for the Generalization Error},
  author  = {Nadeau, Claude and Bengio, Yoshua},
  journal = {Machine Learning},
  volume  = {52},
  number  = {3},
  pages   = {239--281},
  year    = {2003},
  doi     = {10.1023/A:1024068626366}
}

@article{arlot2010survey,
  title   = {A Survey of Cross-Validation Procedures for Model Selection},
  author  = {Arlot, Sylvain and Celisse, Alain},
  journal = {Statistics Surveys},
  volume  = {4},
  pages   = {40--79},
  year    = {2010},
  doi     = {10.1214/09-SS054}
}

@article{dietterich1998approximate,
  title   = {Approximate Statistical Tests for Comparing Supervised Classification Learning Algorithms},
  author  = {Dietterich, Thomas G.},
  journal = {Neural Computation},
  volume  = {10},
  number  = {7},
  pages   = {1895--1923},
  year    = {1998},
  doi     = {10.1162/089976698300017197}
}

@article{simpson1951interpretation,
  author  = {Simpson, Edward H.},
  title   = {The Interpretation of Interaction in Contingency Tables},
  journal = {Journal of the Royal Statistical Society: Series B (Methodological)},
  volume  = {13},
  number  = {2},
  pages   = {238--241},
  year    = {1951},
  doi     = {10.1111/j.2517-6161.1951.tb00088.x}
}

@article{benjamini1995controlling,
  author  = {Benjamini, Yoav and Hochberg, Yosef},
  title   = {Controlling the False Discovery Rate: A Practical and Powerful Approach to Multiple Testing},
  journal = {Journal of the Royal Statistical Society: Series B (Methodological)},
  volume  = {57},
  number  = {1},
  pages   = {289--300},
  year    = {1995},
  doi     = {10.1111/j.2517-6161.1995.tb02031.x}
}

@article{chen2021evaluating,
  author = {Chen, Mark and Tworek, Jerry and Jun, Heewoo and Yuan, Qiming and Pinto, Henrique Ponde de Oliveira and Kaplan, Jared and Edwards, Harri and Burda, Yuri and Joseph, Nicholas and Brockman, Greg and others},
  title = {Evaluating Large Language Models Trained on Code},
  journal = {arXiv preprint arXiv:2107.03374},
  year = {2021}
}

@inproceedings{austin2021programsynthesis,
  author = {Austin, Jacob and Odena, Augustus and Nye, Maxwell and Bosma, Maarten and Michalewski, Henryk and Dohan, David and Jiang, Ellen and Cai, Carrie and Terry, Michael and Le, Quoc and Sutton, Charles},
  title = {Program Synthesis with Large Language Models},
  booktitle = {arXiv preprint arXiv:2108.07732},
  year = {2021}
}

@article{li2022competition,
  author = {Li, Yujia and Choi, David and Chung, Junyoung and Kushman, Nate and Schrittwieser, Julian and Leblond, Remi and Eccles, Tom and Keeling, James and Gimeno, Felix and Dal Lago, Agustin and others},
  title = {Competition-Level Code Generation with {AlphaCode}},
  journal = {Science},
  volume = {378},
  number = {6624},
  year = {2022}
}

@article{du2023classeval,
  author = {Du, Xueying and Liu, Mingwei and Wang, Kaixin and Wang, Hanlin and Liu, Junwei and Chen, Yixuan and Feng, Jiayi and Sha, Chaofeng and Peng, Xin and Lou, Yiling},
  title = {{ClassEval}: A Manually-Crafted Benchmark for Evaluating {LLMs} on Class-Level Code Generation},
  journal = {arXiv preprint arXiv:2308.01861},
  year = {2023}
}

@article{jimenez2023swebench,
  author = {Jimenez, Carlos E and Yang, John and Wettig, Alexander and Yao, Shunyu and Pei, Kexin and Press, Ofir and Narasimhan, Karthik},
  title = {{SWE-bench}: Can Language Models Resolve Real-World {GitHub} Issues?},
  journal = {arXiv preprint arXiv:2310.06770},
  year = {2023}
}

@article{jesse2023large,
  author = {Jesse, Kevin and Ahmed, Toufique and Devanbu, Premkumar T and Morgan, Emily},
  title = {Large Language Models and Simple, Stupid Bugs},
  journal = {arXiv preprint arXiv:2303.11455},
  year = {2023}
}

@inproceedings{pearce2022asleep,
  author = {Pearce, Hammond and Ahmad, Baleegh and Tan, Benjamin and Dolan-Gavitt, Brendan and Karri, Ramesh},
  title = {Asleep at the Keyboard? Assessing the Security of {GitHub Copilot}'s Code Contributions},
  booktitle = {IEEE Symposium on Security and Privacy},
  year = {2022}
}

@article{wei2022chain,
  author = {Wei, Jason and Wang, Xuezhi and Schuurmans, Dale and Bosma, Maarten and Ichter, Brian and Xia, Fei and Chi, Ed and Le, Quoc and Zhou, Denny},
  title = {Chain-of-Thought Prompting Elicits Reasoning in Large Language Models},
  journal = {NeurIPS},
  year = {2022}
}

@article{yao2024tree,
  author = {Yao, Shunyu and Yu, Dian and Zhao, Jeffrey and Shafran, Izhak and Griffiths, Thomas L and Cao, Yuan and Narasimhan, Karthik},
  title = {Tree of Thoughts: Deliberate Problem Solving with Large Language Models},
  journal = {NeurIPS},
  year = {2024}
}

@book{pierce2002types,
  author = {Pierce, Benjamin C},
  title = {Types and Programming Languages},
  publisher = {MIT Press},
  year = {2002}
}

@inproceedings{baylor2017tfx,
  author = {Baylor, Denis and Breck, Eric and Cheng, Heng-Tze and Fiedel, Noah and Foo, Chuan Yu and Haque, Zakaria and Haykal, Salem and Ispir, Mustafa and Jain, Vihan and Koc, Levent and others},
  title = {{TFX}: A {TensorFlow}-Based Production-Scale Machine Learning Platform},
  booktitle = {KDD},
  year = {2017}
}

@article{olsson2022context,
  author = {Olsson, Catherine and Elhage, Nelson and Nanda, Neel and Joseph, Nicholas and DasSarma, Nova and Henighan, Tom and Mann, Ben and Askell, Amanda and Bai, Yuntao and Chen, Anna and others},
  title = {In-Context Learning and Induction Heads},
  journal = {Transformer Circuits Thread},
  year = {2022}
}