bunige-pagedjs-template/example/references.bib

366 lines
28 KiB
BibTeX
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

@article{bunchIndirectReferenceIntervals2022,
title = {Indirect Reference Intervals Using an {{R}} Pipeline.},
author = {Bunch, Dustin R.},
date = {2022-04},
journaltitle = {Journal of mass spectrometry and advances in the clinical lab},
shortjournal = {J Mass Spectrom Adv Clin Lab},
volume = {24},
eprint = {35252947},
eprinttype = {pmid},
pages = {22--30},
location = {Netherlands},
issn = {2667-145X 2667-1468},
doi = {10.1016/j.jmsacl.2022.02.004},
abstract = {BACKGROUND: Indirect reference intervals require robust statistical approaches to separate the pathological and healthy values. This can be achieved with a data pipeline created in R, a freely available statistical programming language. METHODS: A data pipeline was created to ingest, partition, normalize, remove outliers, and identify reference intervals for testosterone (Testo; n ~=~7,207) and aspartate aminotransferase (AST; n ~=~5,882) using data sets from NHANES. RESULTS: The estimates for AST and Testo determined by this pipeline approximated current RIs. Care should be taken when using this pipeline as there are limitations that depend on the pathology of the analyte and the data set being used for RI estimation. CONCLUSIONS: R can be used to create a robust statistical reference interval pipeline.},
langid = {english},
pmcid = {PMC8889237},
keywords = {ANOVA Analysis of variance,AST aspartate aminotransferase,CLSI Clinical Laboratory Standards Institute,cmu,EHR electronic health record,IFCC International Federation of Clinical Chemistry and Laboratory Medicine,LC-MS/MS Liquid chromatography tandem mass spectrometry,LIS Laboratory informatics system,markdown,Mixtools,non lu,pubmed,R markdown,R markdown tutorial,Reference interval,RI reference interval,SDI Standard deviation index,SDR Standard deviation ratio,Testo Testosterone,TukeyHSD Tukey multiple pairwise-comparisons,z5 Critical z-score},
file = {/home/igor/Zotero/storage/YYP9JQTY/Bunch_2022_Indirect reference intervals using an R pipeline.pdf}
}
@article{chapmanExpectedPosterioriScoring2022,
title = {Expected a Posteriori Scoring in {{PROMIS}}(®).},
author = {Chapman, Robert},
date = {2022-06-03},
journaltitle = {Journal of patient-reported outcomes},
shortjournal = {J Patient Rep Outcomes},
volume = {6},
number = {1},
eprint = {35657454},
eprinttype = {pmid},
pages = {59},
location = {Germany},
issn = {2509-8020},
doi = {10.1186/s41687-022-00464-9},
abstract = {BACKGROUND: The Patient-Reported Outcome Measurement Information System(®) (PROMIS(®)) was developed to reliably measure health-related quality of life using the patient's voice. To achieve these aims, PROMIS utilized Item Response Theory methods in its development, validation and implementation. PROMIS measures are typically scored using a specific method to calculate scores, called Expected A Posteriori estimation. BODY: Expected A Posteriori scoring methods are flexible, produce accurate scores and can be efficiently calculated by statistical software. This work seeks to make Expected A Posteriori scoring methods transparent and accessible to a larger audience through description, graphical demonstration and examples. Further applications and practical considerations of Expected A Posteriori scoring are presented and discussed. All materials used in this paper are made available through the R Markdown reproducibility framework and are intended to be reviewed and reused. Commented statistical code for the calculation of Expected A Posteriori scores is included. CONCLUSION: This work seeks to provide the reader with a summary and visualization of the operation of Expected A Posteriori scoring, as implemented in PROMIS. As PROMIS is increasingly adopted and implemented, this work will provide a basis for making psychometric methods more accessible to the PROMIS user base.},
langid = {english},
pmcid = {PMC9166925},
keywords = {cmu,markdown,non lu,pubmed,R markdown},
file = {/home/igor/Zotero/storage/7R2KLGBS/Chapman_2022_Expected a posteriori scoring in PROMIS(®).pdf}
}
@online{commonmarkCommonMark,
title = {{{CommonMark}}},
author = {CommonMark},
url = {https://commonmark.org/},
urldate = {2023-04-25},
langid = {english},
organization = {CommonMark},
keywords = {cmu,non lu},
file = {/home/igor/Zotero/storage/ESNTHT9H/commonmark.org.html}
}
@article{considineToolEncourageMinimum2019,
title = {A {{Tool}} to {{Encourage Minimum Reporting Guideline Uptake}} for {{Data Analysis}} in {{Metabolomics}}.},
author = {Considine, Elizabeth C. and Salek, Reza M.},
date = {2019-03-05},
journaltitle = {Metabolites},
shortjournal = {Metabolites},
volume = {9},
number = {3},
eprint = {30841575},
eprinttype = {pmid},
location = {Switzerland},
issn = {2218-1989},
doi = {10.3390/metabo9030043},
abstract = {Despite the proposal of minimum reporting guidelines for metabolomics over a decade ago, reporting on the data analysis step in metabolomics studies has been shown to be unclear and incomplete. Major omissions and a lack of logical flow render the data analysis' sections in metabolomics studies impossible to follow, and therefore replicate or even imitate. Here, we propose possible reasons why the original reporting guidelines have had poor adherence and present an approach to improve their uptake. We present in this paper an R markdown reporting template file that guides the production of text and generates workflow diagrams based on user input. This R Markdown template contains, as an example in this instance, a set of minimum information requirements specifically for the data pre-treatment and data analysis section of biomarker discovery metabolomics studies, (gleaned directly from the original proposed guidelines by Goodacre at al). These minimum requirements are presented in the format of a questionnaire checklist in an R markdown template file. The R Markdown reporting template proposed here can be presented as a starting point to encourage the data analysis section of a metabolomics manuscript to have a more logical presentation and to contain enough information to be understandable and reusable. The idea is that these guidelines would be open to user feedback, modification and updating by the metabolomics community via GitHub.},
langid = {english},
pmcid = {PMC6468746},
keywords = {cmu,data analysis,markdown,minimum guidelines,non lu,pubmed,R markdown,reporting,reproducibility},
file = {/home/igor/Zotero/storage/RSRUDDYS/Considine_Salek_2019_A Tool to Encourage Minimum Reporting Guideline Uptake for Data Analysis in.pdf}
}
@article{daasDynamicPublicationMedia2022,
title = {Dynamic Publication Media with the {{COPASI R Connector}} ({{CoRC}}).},
author = {Daas, Johanna C. J. and Förster, Jonas D. and Pahle, Jürgen},
date = {2022-06},
journaltitle = {Mathematical biosciences},
shortjournal = {Math Biosci},
volume = {348},
eprint = {35452633},
eprinttype = {pmid},
pages = {108822},
location = {United States},
issn = {1879-3134 0025-5564},
doi = {10.1016/j.mbs.2022.108822},
abstract = {In this article we show how dynamic publication media and the COPASI R Connector (CoRC) can be combined in a natural and synergistic way to communicate (biochemical) models. Dynamic publication media are becoming a popular tool for authors to effectively compose and publish their work. They are built from templates and the final documents are created dynamically. In addition, they can also be interactive. Working with dynamic publication media is made easy with the programming environment R via its integration with tools such as R Markdown, Jupyter and Shiny. Additionally, the COmplex PAthway SImulator COPASI (http://www.copasi.org), a widely used biochemical modelling toolkit, is available in R through the use of the COPASI R Connector (CoRC, https://jpahle.github.io/CoRC). Models are a common tool in the mathematical biosciences, in particular kinetic models of biochemical networks in (computational) systems biology. We focus on three application areas of dynamic publication media and CoRC: Documentation (reproducible workflows), Teaching (creating self-paced lessons) and Science Communication (immersive and engaging presentation). To illustrate these, we created six dynamic document examples in the form of R Markdown and Jupyter notebooks, hosted on the platforms GitHub, shinyapps.io, Google Colaboratory. Having code and output in one place, creating documents in template-form and the option of interactivity make the combination of dynamic documents and CoRC a versatile tool. All our example documents are freely available at https://jpahle.github.io/DynamiCoRC under the Creative Commons BY 4.0 licence.},
langid = {english},
keywords = {*Software,*Systems Biology,cmu,COPASI,CoRC,Dynamic publication media,Jupyter,Kinetics,markdown,non lu,pubmed,R markdown,Systems biology},
file = {/home/igor/Zotero/storage/WJ5FCFZY/Daas et al_2022_Dynamic publication media with the COPASI R Connector (CoRC).pdf}
}
@unpublished{deletrazModeTexteMarkdown2022,
type = {Atelier},
title = {En mode texte\,: Markdown, Stylo, Pandoc, Notebook…},
author = {Deletraz, Gaëlle and Rabaud, Julien},
date = {2022},
url = {https://markdown-somate2022.netlify.app},
urldate = {2023-04-17},
eventtitle = {So-Mate 2022},
langid = {fre},
venue = {Pau},
keywords = {cmu,markdown,non lu,pandoc,Publication scientifique}
}
@online{eyssetteUtiliserMarkdownPour2023,
type = {Mastodon post},
title = {Utiliser le Markdown pour tout faire.Le diaporama (fait en markdown bien sûr !) qui m'a servi de support lors de mon atelier pour la Journée…},
author = {Eyssette, Cédric (@eyssette@scholar.social)},
date = {2023-04-07},
url = {https://scholar.social/@eyssette/110158455766516456},
urldate = {2023-04-17},
abstract = {Utiliser le Markdown pour tout faire.Le diaporama (fait en markdown bien sûr !) qui m'a servi de support lors de mon atelier pour la Journée du Libre Éducatif 2023.https://eyssette.forge.aeif.fr/marp-slides/slides/2022-2023/utiliser-le-markdown-pour-tout-faire\#Markdown\#JDLE\#JDLE2023\#TeamEduc\#MastoProf},
langid = {french},
organization = {Mastodon},
keywords = {cmu,fediverse,markdown,mastodon,non lu},
file = {/home/igor/Zotero/storage/7FYFNED6/110158455766516456.html}
}
@unpublished{eyssetteUtiliserMarkdownPour2023a,
title = {Utiliser le markdown pour tout faire},
author = {Eyssette, Cédric},
date = {2023},
url = {https://eyssette.forge.aeif.fr/marp-slides/slides/2022-2023/utiliser-le-markdown-pour-tout-faire},
urldate = {2023-04-17},
langid = {fre},
keywords = {cmu,non lu},
file = {/home/igor/Zotero/storage/I7CTE3X5/utiliser-le-markdown-pour-tout-faire.html}
}
@online{fressinaudGuideMarkdown2022,
type = {Site d'une application web},
title = {Guide Markdown},
author = {Fressinaud, Marien},
date = {2022-07-11},
url = {https://flus.fr/carnet/markdown.html},
urldate = {2023-04-17},
abstract = {Un guide simple pour apprendre Markdown.},
langid = {fre},
organization = {flus},
keywords = {cmu,documentation,markdown,non lu},
file = {/home/igor/Zotero/storage/96VGWVYV/markdown.html}
}
@article{graysonMarkdownDynamicInterface2022,
title = {R {{Markdown}} as a Dynamic Interface for Teaching: {{Modules}} from Math and Biology Classrooms.},
author = {Grayson, Kristine L. and Hilliker, Angela K. and Wares, Joanna R.},
date = {2022-07},
journaltitle = {Mathematical biosciences},
shortjournal = {Math Biosci},
volume = {349},
eprint = {35623397},
eprinttype = {pmid},
pages = {108844},
location = {United States},
issn = {1879-3134 0025-5564},
doi = {10.1016/j.mbs.2022.108844},
abstract = {Advancing technologies, including interactive tools, are changing classroom pedagogy across academia. Here, we discuss the R Markdown interface, which allows for the creation of partial or complete interactive classroom modules for courses using the R programming language. R Markdown files mix sections of R code with formatted text, including LaTeX, which are rendered together to form complete reports and documents. These features allow instructors to create classroom modules that guide students through concepts, while providing areas for coding and text response by students. Students can also learn to create their own reports for more independent assignments. After presenting the features and uses of R Markdown to enhance teaching and learning, we present examples of materials from two courses. In a Computational Modeling course for math students, we used R Markdown to guide students through exploring mathematical models to understand the principle of herd immunity. In a Data Visualization and Communication course for biology students, we used R Markdown for teaching the fundamentals of R programming and graphing, and for students to learn to create reproducible data investigations. Through these examples, we demonstrate the benefits of R Markdown as a dynamic teaching and learning tool.},
langid = {english},
pmcid = {PMC9487201},
keywords = {*Learning,*Students,Biology/education,cmu,Data visualization,Herd immunity,Humans,markdown,non lu,Pedagogy,pubmed,R markdown,Teaching programming},
file = {/home/igor/Zotero/storage/JQGHBHB5/Grayson et al_2022_R Markdown as a dynamic interface for teaching.pdf}
}
@online{gruberMarkdown2004,
type = {Site personnel},
title = {Markdown},
author = {Gruber, John},
date = {2004-12-17},
url = {https://daringfireball.net/projects/markdown/},
urldate = {2023-04-17},
langid = {english},
organization = {Daring Fireball},
keywords = {cmu,documentation,markdown,non lu},
file = {/home/igor/Zotero/storage/J7WP5B3D/markdown.html}
}
@article{hershbergJBrowseRInterfaceJBrowse2021,
title = {{{JBrowseR}}: An {{R}} Interface to the {{JBrowse}} 2 Genome Browser.},
author = {Hershberg, Elliot A. and Stevens, Garrett and Diesh, Colin and Xie, Peter and De Jesus Martinez, Teresa and Buels, Robert and Stein, Lincoln and Holmes, Ian},
date = {2021-11-05},
journaltitle = {Bioinformatics (Oxford, England)},
shortjournal = {Bioinformatics},
volume = {37},
number = {21},
eprint = {34196689},
eprinttype = {pmid},
pages = {3914--3915},
location = {England},
issn = {1367-4811 1367-4803},
doi = {10.1093/bioinformatics/btab459},
abstract = {MOTIVATION: Genome browsers are an essential tool in genome analysis. Modern genome browsers enable complex and interactive visualization of a wide variety of genomic data modalities. While such browsers are very powerful, they can be challenging to configure and program for bioinformaticians lacking expertise in web development. RESULTS: We have developed an R package that provides an interface to the JBrowse 2 genome browser. The package can be used to configure and customize the browser entirely with R code. The browser can be deployed from the R console, or embedded in Shiny applications or R Markdown documents. AVAILABILITY AND IMPLEMENTATION: JBrowseR is available for download from CRAN, and the source code is openly available from the Github repository at https://github.com/GMOD/JBrowseR/.},
langid = {english},
pmcid = {PMC8570803},
keywords = {*Genome,*Genomics,cmu,markdown,non lu,pubmed,R markdown,Software},
file = {/home/igor/Zotero/storage/LYEIF748/Hershberg et al_2021_JBrowseR.pdf}
}
@article{jaglaSCHNAPPsSingleCell2021,
title = {{{SCHNAPPs}} - {{Single Cell sHiNy APPlication}}(s).},
author = {Jagla, Bernd and Libri, Valentina and Chica, Claudia and Rouilly, Vincent and Mella, Sebastien and Puceat, Michel and Hasan, Milena},
date = {2021-12},
journaltitle = {Journal of immunological methods},
shortjournal = {J Immunol Methods},
volume = {499},
eprint = {34742775},
eprinttype = {pmid},
pages = {113176},
location = {Netherlands},
issn = {1872-7905 0022-1759},
doi = {10.1016/j.jim.2021.113176},
abstract = {Single-cell RNA-sequencing (scRNAseq) experiments are becoming a standard tool for bench-scientists to explore the cellular diversity present in all tissues. Data produced by scRNAseq is technically complex and requires analytical workflows that are an active field of bioinformatics research, whereas a wealth of biological background knowledge is needed to guide the investigation. Thus, there is an increasing need to develop applications geared towards bench-scientists to help them abstract the technical challenges of the analysis so that they can focus on the science at play. It is also expected that such applications should support closer collaboration between bioinformaticians and bench-scientists by providing reproducible science tools. We present SCHNAPPs, a Graphical User Interface (GUI), designed to enable bench-scientists to autonomously explore and interpret scRNAseq data and associated annotations. The R/Shiny-based application allows following different steps of scRNAseq analysis workflows from Seurat or Scran packages: performing quality control on cells and genes, normalizing the expression matrix, integrating different samples, dimension reduction, clustering, and differential gene expression analysis. Visualization tools for exploring each step of the process include violin plots, 2D projections, Box-plots, alluvial plots, and histograms. An R-markdown report can be generated that tracks modifications and selected visualizations. The modular design of the tool allows it to easily integrate new visualizations and analyses by bioinformaticians. We illustrate the main features of the tool by applying it to the characterization of T cells in a scRNAseq and Cellular Indexing of Transcriptomes and Epitopes by Sequencing (CITE-Seq) experiment of two healthy individuals.},
langid = {english},
keywords = {*Sequence Analysis RNA,*Single-Cell Analysis,*Software,CITE-Seq,cmu,Humans,Leukocytes Mononuclear/*cytology/immunology,markdown,multi-omics data analysis,non lu,pubmed,R markdown,scRNA-seq,Shiny application},
file = {/home/igor/Zotero/storage/PEARJ76P/Jagla et al_2021_SCHNAPPs - Single Cell sHiNy APPlication(s).pdf}
}
@article{kariyawasamDashboardstyleInteractivePlots2021,
title = {Dashboard-Style Interactive Plots for {{RNA-seq}} Analysis Are {{R Markdown}} Ready with {{Glimma}} 2.0.},
author = {Kariyawasam, Hasaru and Su, Shian and Voogd, Oliver and Ritchie, Matthew E. and Law, Charity W.},
date = {2021-12},
journaltitle = {NAR genomics and bioinformatics},
shortjournal = {NAR Genom Bioinform},
volume = {3},
number = {4},
eprint = {34988439},
eprinttype = {pmid},
pages = {lqab116},
location = {England},
issn = {2631-9268},
doi = {10.1093/nargab/lqab116},
abstract = {Glimma 1.0 introduced intuitive, point-and-click interactive graphics for differential gene expression analysis. Here, we present a major update to Glimma that brings improved interactivity and reproducibility using high-level visualization frameworks for R and JavaScript. Glimma 2.0 plots are now readily embeddable in R Markdown, thus allowing users to create reproducible reports containing interactive graphics. The revamped multidimensional scaling plot features dashboard-style controls allowing the user to dynamically change the colour, shape and size of sample points according to different experimental conditions. Interactivity was enhanced in the MA-style plot for comparing differences to average expression, which now supports selecting multiple genes, export options to PNG, SVG or CSV formats and includes a new volcano plot function. Feature-rich and user-friendly, Glimma makes exploring data for gene expression analysis more accessible and intuitive and is available on Bioconductor and GitHub.},
langid = {english},
pmcid = {PMC8693569},
keywords = {cmu,markdown,non lu,pubmed,R markdown},
file = {/home/igor/Zotero/storage/ZQD2WXNN/Kariyawasam et al_2021_Dashboard-style interactive plots for RNA-seq analysis are R Markdown ready.pdf}
}
@article{ovadiaMarkdownLibrariansAcademics2014a,
title = {Markdown for {{Librarians}} and {{Academics}}},
author = {Ovadia, Steven},
date = {2014-04-03},
journaltitle = {Behavioral \& Social Sciences Librarian},
shortjournal = {Behavioral \& Social Sciences Librarian},
volume = {33},
number = {2},
pages = {120--124},
issn = {0163-9269, 1544-4546},
doi = {10.1080/01639269.2014.904696},
url = {http://www.tandfonline.com/doi/abs/10.1080/01639269.2014.904696},
urldate = {2023-05-02},
langid = {english},
keywords = {cmu,markdown,non lu,Publication scientifique},
file = {/home/igor/Zotero/storage/25MP4P3N/Ovadia_2014_Markdown for Librarians and Academics.pdf}
}
@online{paged.jsPagedJs,
title = {About {{Paged}}.Js?},
author = {Paged.js},
url = {https://pagedjs.org/about/},
urldate = {2024-03-08},
langid = {english},
organization = {Paged.js},
keywords = {édition numérique,non lu,publication numérique}
}
@online{pandocWritingThesisThinking2023,
type = {Mastodon post},
title = {Writing a \#thesis? {{Thinking}} about Doing It in \#{{Markdown}}? {{No}} Problem, {{Tom Pollard}} Has You Covered.{{https://github.com/tompollard/phd\_thesis\_ma…}}},
shorttitle = {Writing a \#thesis?},
author = {{pandoc}, (@pandoc@fosstodon.org)},
date = {2023-04-12},
url = {https://fosstodon.org/@pandoc/110184589069297715},
urldate = {2023-04-17},
abstract = {Writing a \#thesis? Thinking about doing it in \#Markdown? No problem, Tom Pollard has you covered.https://github.com/tompollard/phd\_thesis\_markdown},
langid = {english},
organization = {Mastodon},
keywords = {cmu,fediverse,markdown,mastodon,non lu,pandoc,Publication scientifique},
file = {/home/igor/Zotero/storage/T354LYSW/110184589069297715.html}
}
@online{perretFormatTexte2022,
type = {Site personnel},
title = {Format texte},
author = {Perret, Arthur},
date = {2022-12-11},
url = {https://www.arthurperret.fr/cours/format-texte.html},
urldate = {2023-04-18},
abstract = {Cette page explique ce quest le format texte et donne des arguments en faveur de son utilisation. Elle ouvre vers dautres ressources pour explorer lécosystème du format texte.},
langid = {fre},
organization = {Arthur Perret},
keywords = {cmu,markdown,non lu,plain text},
note = {\begin{quotation}
\par
[C]est un fichier qui ne contient des caractères. (Perret, 2022)
\par
\end{quotation}},
file = {/home/igor/Zotero/storage/3IM8K4DU/format-texte.html}
}
@online{perretMarkdown2022,
type = {Site personnel},
title = {Markdown},
author = {Perret, Arthur},
date = {2022-12-11},
publisher = {arthurperret.fr/},
url = {https://www.arthurperret.fr/cours/markdown.html},
urldate = {2023-04-17},
abstract = {Cette page explique ce quest Markdown, le plus populaire des langages de balisage léger, et renvoie vers un tutoriel interactif en français.},
langid = {french},
organization = {Arthur Perret},
keywords = {cmu,documentation,markdown,non lu},
file = {/home/igor/Zotero/storage/HZNSDQ7S/markdown.html}
}
@software{pollardTemplateWritingPhD2023,
title = {Template for Writing a {{PhD}} Thesis in {{Markdown}}},
author = {Pollard, Tom},
date = {2023-04-17T04:40:26Z},
origdate = {2015-02-10T10:32:25Z},
url = {https://github.com/tompollard/phd_thesis_markdown},
urldate = {2023-04-17},
abstract = {Template for writing a PhD thesis in Markdown},
keywords = {cmu,markdown,non lu,pandoc,Publication scientifique,thesis-template}
}
@article{schneiderFacilitatingOpenScience2022,
title = {Facilitating Open Science Practices for Research Syntheses: {{PreregRS}} Guides Preregistration.},
author = {Schneider, Jürgen and Backfisch, Iris and Lachner, Andreas},
date = {2022-03},
journaltitle = {Research synthesis methods},
shortjournal = {Res Synth Methods},
volume = {13},
number = {2},
eprint = {34921744},
eprinttype = {pmid},
pages = {284--289},
location = {England},
issn = {1759-2887 1759-2879},
doi = {10.1002/jrsm.1540},
abstract = {Researchers increasingly engage in adopting open science practices in the field of research syntheses, such as preregistration. Preregistration is a central open science practice in empirical research to enhance transparency in the research process and it gains steady adoption in the context of conducting research synthesis. From an interdisciplinary perspective, frameworks and particularly templates are lacking which support researchers preparing a preregistration. To this end, we introduce preregRS, a template to guide researchers across disciplines through the process of preregistering research syntheses. We utilized an R Markdown template file to provide a framework that structures the process of preparing a preregistration. Researchers can write up the preregistration using the template file similar to filling out a form, with the template providing additional hints and further information for the decisions along the framework. We integrated the R Markdown template in an R package for easy installation and use, but also provide a browser-based option for users granting low-barrier access. PreregRS constitutes a first step to facilitate and support preregistration with research syntheses for all disciplines. It further adds to establishing open science practices in conducting research syntheses.},
langid = {english},
keywords = {*Surveys and Questionnaires,cmu,Empirical Research,markdown,non lu,open science,preregistration,pubmed,R markdown,R package}
}
@article{yuVeridicalDataScience2020,
title = {Veridical Data Science.},
author = {Yu, Bin and Kumbier, Karl},
date = {2020-02-25},
journaltitle = {Proceedings of the National Academy of Sciences of the United States of America},
shortjournal = {Proc Natl Acad Sci U S A},
volume = {117},
number = {8},
eprint = {32054788},
eprinttype = {pmid},
pages = {3920--3929},
location = {United States},
issn = {1091-6490 0027-8424},
doi = {10.1073/pnas.1901326117},
abstract = {Building and expanding on principles of statistics, machine learning, and scientific inquiry, we propose the predictability, computability, and stability (PCS) framework for veridical data science. Our framework, composed of both a workflow and documentation, aims to provide responsible, reliable, reproducible, and transparent results across the data science life cycle. The PCS workflow uses predictability as a reality check and considers the importance of computation in data collection/storage and algorithm design. It augments predictability and computability with an overarching stability principle. Stability expands on statistical uncertainty considerations to assess how human judgment calls impact data results through data and model/algorithm perturbations. As part of the PCS workflow, we develop PCS inference procedures, namely PCS perturbation intervals and PCS hypothesis testing, to investigate the stability of data results relative to problem formulation, data cleaning, modeling decisions, and interpretations. We illustrate PCS inference through neuroscience and genomics projects of our own and others. Moreover, we demonstrate its favorable performance over existing methods in terms of receiver operating characteristic (ROC) curves in high-dimensional, sparse linear model simulations, including a wide range of misspecified models. Finally, we propose PCS documentation based on R Markdown or Jupyter Notebook, with publicly available, reproducible codes and narratives to back up human choices made throughout an analysis. The PCS workflow and documentation are demonstrated in a genomics case study available on Zenodo.},
langid = {english},
pmcid = {PMC7049126},
keywords = {cmu,computation,data science,markdown,non lu,prediction,pubmed,R markdown,stability},
file = {/home/igor/Zotero/storage/GWVCYDA6/Yu_Kumbier_2020_Veridical data science.pdf}
}