-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.R
570 lines (515 loc) · 20.9 KB
/
app.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
## app.R ##
# samplesheet-generator shiny app
library(shiny)
library(shinydashboard)
library(shinythemes)
library(shinyWidgets) #remotes::install_github("dreamRs/shinyWidgets")
library(shinyjs)
library(shinyalert)
library(shinypop) # remotes::install_github("dreamRs/shinypop")
library(data.table)
library(dplyr)
library(stringr)
library(kableExtra)
####
# setup global variables
####
# ------------------------------------------------------------------comment this out before deployment
commit_hash <- system("git rev-parse --short HEAD", intern = TRUE)
# ------------------------------------------------------------------comment this out before deployment
indexkitslist <- list(
"Illumina" = list(
"IDT for Illumina DNA UD Indexes, Tagmentation" = "idt-udp",
"IDT for Illumina DNA/RNA UD Indexes, Tagmentation, ver2" = "idt-udp-ver2",
"Nextera DNA CD Indexes (96 Indexes, 96 Samples)" = "nextera-dna-cd96",
"Nextera XT v2 CD" = "nextera-xt-v2"),
"NEB" = list(
"NEBNext Multiplex Oligos for Illumina (96 Unique Dual Index Primer Pairs)" = "neb"),
"Zymo" = list(
"Zymo-Seq UDI Primer Set" = "zymo")
)
machines <- list(
"forward strand workflow" = list(
"MiSeq" = "miseq", "NextSeq 2000" = "miseq", "HiSeq 2000/2500" = "miseq", "NovaSeq v1.0 reagents" = "miseq"),
"reverse complement workflow"= list(
"iSeq" = "nextseq", "MiniSeq" = "nextseq", "NextSeq 500/550" = "nextseq", "HiSeq 3000/4000/X" = "nextseq", "NovaSeq v1.5 reagents" = "nextseq")
)
# load data and make it available for all sessions
indexcsv <- fread("indexdata/indexcsv.csv")
sh_colnames <- c("Sample_ID", "Index_Plate_Well", "Index_Plate",
"I7_Index_ID", "index", "I5_Index_ID", "index2",
"Sample_Project", "Description")
demo_data <- "#Demo data, delete it and paste yours#\nsample01\tA01\tA\nsample02\tA02\tA\nsample03\tA01\tB"
#####
ui <- fluidPage(
# include css needed for shinydashboard to work. I am NOT putting everything in dashboardPage and they are NOT loaded
includeCSS("css/AdminLTE.css"),
includeCSS("css/shinydashboard.css"),
useShinyjs(),
useShinyalert(),
use_notiflix_notify(position = "right-bottom", width = "480px", timeout = 4000),
use_notiflix_report(cssAnimationDuration = 100, width = "100%", messageMaxLength = 1800),
#use_notie(),
theme = shinytheme("cosmo"),
titlePanel("Illumina Samplesheet Generator",
windowTitle = "Illumina Samplesheet Generator"),
fluidRow(
column(2,
tags$caption(
# ),
#column(2,
actionBttn("supportedkits", label = "Supported kits",
size = "lg", style = "fill", color = "default")
)),
column(10,
valueBoxOutput("samples_pasted", width = 4),
#valueBoxOutput("samples_matched", width = 3),
valueBoxOutput("samples_valid", width = 4),
valueBoxOutput("samples_clashed", width = 4)
)
),
#tags$h5("This tool generates Illumina sequencing sample sheets (double indexing only , UDI and CD)."),
tags$hr(),
tabsetPanel(
#well = F, fluid = F,
#widths = c(2,10),
tabPanel(
"1. Paste sample-well data",
tags$h4("Paste sample-well-set mapping and press the 'Read data' button"),
actionBttn("read", "Read data", style = "fill", color = "default", icon = icon("play")),
actionBttn("reset", "Reset", style = "fill", color = "default", icon = icon("stop")),
tags$h5(
"You can paste a 3-column data from excel, for example 'sample01; A01; A' or 'sample01 A1 B'. The
pasted data must have 3 columns (any separator) and at least 1 new line. No empty lines are allowed."
),
fluidRow(
column(6,
textAreaInput(
"csv",
value = demo_data,
label = "Paste data here",
height = '300px', width = '100%'
)
),
column(6,
tableOutput("csvread")
)
)
),
tabPanel(
"2. Select index kit and machine",
tags$h4("Select index kit, index set and machine"),
fluidRow(
column(
6,
pickerInput(
"indexkit",
choices = indexkitslist,
multiple = FALSE,
width = "100%",
label = "Select Index Kit",
inline = FALSE
)
),
column(
2,
pickerInput(
"set",
choices = c("A", "B", "C", "D"),
selected = "A",
multiple = TRUE,
width = "100%",
label = "Select Set(s)",
inline = FALSE
)
),
column(
4,
pickerInput(
"machine",
choices = machines,
multiple = FALSE,
width = "80%",
label = "Select machine",
inline = FALSE
)
)
),
#tags$hr(),
tags$br(),
tags$hr(),
h4("Sample sheet preview"),
tableOutput("shPreview1")
),
tabPanel(
"3. Add run details and get samplesheet",
tags$h4("Fill in optional fileds"),
tags$h6(
"For running bcl2fastq these fields are optional, but may be needed if the sample sheet is needed for other software.
The 'Read1' and 'Read2' fields are required only if a sample sheet file is used to set up a sequencing run through the MiSeq Control Software."
),
# inputs for Header
fluidRow(column(
3,
# airDatePicker gives some strange bugs in chrome
dateInput(inputId = "date", "Date", value = Sys.Date())
), column(
3,
textInput(inputId = "investigator", "Investigator"),
), column(
3,
textInput("description", "Description")
), column(
1,
numericInput("read1", label = "Read1", min = 51, max = 301, step = 1, value = NULL, width = "100%")
), column(
1,
numericInput("read2", label = "Read2", min = 51, max = 301, step = 1, value = NULL, width = "100%")
)),
fluidRow(column(
3,
prettyCheckbox("trimming", "Adapter trimming",
value = FALSE,
status = "info", fill = TRUE)
)),
tags$hr(),
tags$h4("Download samplesheet or sample-index mapping"),
tags$h6(
"Download complete sample sheet or just the sample-index mapping, to be inserted after the [DATA] field in a sample sheet.
A preview of the sample sheet is shown below."
),
downloadBttn(
"download1",
style = "fill", color = "default",
label = "Download sample sheet",
size = "sm"
),
downloadBttn(
"download2",
style = "fill", color = "default",
label = "Download sample-index mapping",
size = "sm"
),
# preview complete sample sheet
verbatimTextOutput("shPreview2")
)
)
)
server <- function(input, output, session) {
# ------------------------------------------------------------- data input handling
# setup reactives for
# pasted data and indexkits data
values <- reactiveValues(csv_data = NULL,
samples_pasted = 0, # number of samples in pasted data
#samples_matched = 0, # number of samples with matched index well
samples_valid = 0, # matched with valid sample_id
samples_clashed = 0, # tracks if indexes are unique in sample sheet
sample_id_valid = TRUE,
sample_id_duplicate = TRUE,
index_well_valid = TRUE)
# these are the reactives for the sample sheet sections apart from [DATA]
sh_values <- reactiveValues(date = NULL,
investigator = NULL,
description = NULL,
trimm_seq1 = NULL,
trimm_seq2 = NULL,
read1 = NULL,
read2 = NULL)
# ------------------------------------------------------------- read in pasted data
observeEvent(input$read, {
if(input$csv != '') {
values$csv_data <- try(
fread(text = input$csv, header = FALSE, col.names = c("Sample_ID", "Index_Plate_Well", "Index_Plate")) %>%
# to capture cases where set 1,2... or a,b... is used instead of set A, B...
mutate(Index_Plate = case_when(
Index_Plate == 1 | Index_Plate == "a" ~ "A",
Index_Plate == 2 | Index_Plate == "b" ~ "B",
Index_Plate == 3 | Index_Plate == "c" ~ "C",
Index_Plate == 4 | Index_Plate == "d" ~ "D",
TRUE ~ as.character(Index_Plate)
),
Index_Plate_Well = ifelse(str_length(Index_Plate_Well) == 2,
yes = str_replace(Index_Plate_Well, "([1-9])", "0\\1"),
no = Index_Plate_Well) %>% toupper()
)
)
# solution to insert leading zeros for 1-9
# ifelse(str_length(chr) == 2, yes = str_replace(chr, "([1-9])", "0\\1"), no = chr) # \1 is the capture group
values$samples_pasted <- nrow(values$csv_data)
# check for valid index plate well names, check for valid Sample_ID names
# now this is a logical vector, to store positions of rows that do match pattern
values$index_well_valid <- str_detect(values$csv_data$Index_Plate_Well, "^[A-H]0[0-9]$|^[A-H]1[0-2]$")
# The field for the Sample_ID column has special character restrictions
#as only alphanumeric (ASCII codes 48-57, 65- 90, and 97-122),
#dash (ASCII code 45), and underscore (ASCII code 95) are permitted.
#The Sample_ID length is limited to 100 characters maximum
# this gets complicated: first get a logical vector with valid names, then AND for duplicated names so that
# only valid names which are not duplicated remain
values$sample_id_valid <- str_detect(values$csv_data$Sample_ID, "^[-_0-9A-Za-z]{2,100}$")
values$sample_id_duplicate <- !duplicated(values$csv_data$Sample_ID)
} else {
nx_notify_error("Paste something first!")
}
}
)
#-----------------------------------------------------------REACTIVES
# --------------------------------------------------------- filter index data
indexdata <- reactive({
validate(need(input$set, message = "Set is required!"))
indexcsv %>% dplyr::filter(indexkit == input$indexkit, set %in% input$set, workflow == input$machine)
})
#------------------------------------------------------------ join
joindata <- reactive({
validate(need(values$csv_data, "No samples data pasted"))
values$csv_data[values$sample_id_valid & values$sample_id_duplicate, ] %>%
inner_join(indexdata(), by = c("Index_Plate_Well" = "Index_Plate_Well", "Index_Plate" = "Index_Plate")) %>%
as.data.table() # make sure it is not something else after join
})
#------------------------------------------------------------ header of sample sheet
sh <- reactive({
c(
"[Header]",
mapply(
paste,
list("Date", "Investigator", "Description", "Workflow"),
list(sh_values$date, sh_values$investigator, sh_values$description,"GenerateFASTQ"),
MoreArgs = list(sep = ",")
),
"", #empty lines are ignored - here just for clarity
"[AppInfo]",
paste("name", "samplesheet-generator", sep = ","),
paste("author", "Angel Angelov", sep = ","),
paste("git_commit", commit_hash, sep = ","),
# this app must be under git control!
paste("executed_on", Sys.time(), sep = ","),
paste("indexkit_used", input$indexkit, sep = ","),
paste("workflow_used", input$machine, sep = ","),
"",
"[Settings]",
paste("Adapter", sh_values$trimm_seq1, sep = ","),
paste("AdapterRead2", sh_values$trimm_seq2, sep = ","),
"",
"[Reads]",
# only required when using a sample sheet file to set up a sequencing run through the MiSeq® Control Software.
sh_values$read1,
sh_values$read2,
"",
"[Data]"
)
})
# observe index clashes and number of matched and clashed samples
observe({
# both indexes are not unique
if ( length(unique(joindata()$index_check)) < length(joindata()$index_check) ) {
#values$samples_matched <- nrow( joindata() )
values$samples_clashed <- length(joindata()$index_check) - length(unique(joindata()$index_check))
nx_notify_error("Two or more samples (highlighted in red) have the same indexes! Please check your input.")
# i7 or i5 is not unique, i.e. CD indexing schemes
} else if( length(unique(joindata()$index)) < length(joindata()$index) ) {
#values$samples_matched <- nrow( joindata() )
nx_report_warning("Warning!",
message = "Two or more samples have the same i7 or i5 index.\n
This is OK if you are using combinatorial dual indexing scheme, but consider using UDIs!")
} else {
#values$samples_matched <- nrow( joindata() )
values$samples_clashed <- 0
}
})
# separate observer for valid index well and sample id names
observe({
if( !all(values$index_well_valid ) ) {
nx_notify_warning("Index_Plate_Well name is not valid! Only A01 to H12 are accepted")
values$samples_valid <- nrow( joindata() )
}
if( !all(values$sample_id_valid) ){
nx_notify_warning("Sample_ID name is not valid! Only '0-9', 'A-Z', 'a-z', '-' and '_' allowed")
values$samples_valid <- nrow( joindata() )
}
if( !all(values$sample_id_duplicate) ) {
nx_notify_warning("Sample_ID names must be unique!")
values$samples_valid <- nrow( joindata() )
} else {
values$samples_valid <- nrow( joindata() ) - values$samples_clashed
}
})
# various other OBSERVERS
#-------------------------------- list supported kits
observeEvent(input$supportedkits, {
nx_report_info("Supported indexing kits",
message = tags$p(
style = "text-align: left;",
tags$ul(tags$li(
tags$a(href = "https://emea.support.illumina.com/bulletins/2020/06/illumina-library-prep-kits-and-associated-index-kits.html",
"IDT for Illumina DNA UD Indexes, Tagmentation, Sets A-D, Cat.# 20027213, 20027214, 20027215, 20027216",
target = "_blank")),
tags$li(
tags$a(href = "https://emea.support.illumina.com/bulletins/2020/06/illumina-library-prep-kits-and-associated-index-kits.html",
"IDT for Illumina DNA/RNA UD Indexes, Tagmentation, Sets A-D ver2, Cat.# 20027213, 20027214, 20042666, 20742667",
target = "_blank")),
tags$li(
tags$a(href = "https://emea.support.illumina.com/bulletins/2020/06/illumina-library-prep-kits-and-associated-index-kits.html",
"Nextera DNA CD Indexes (96 Indexes, 96 Samples), Cat.# 20018708",
target = "_blank")),
tags$li(
tags$a(href = "https://emea.support.illumina.com/bulletins/2020/06/illumina-library-prep-kits-and-associated-index-kits.html",
"Nextera XT Index Kit v2 Set A-D, Cat.# FC‑131‑2001, FC‑131‑2002, FC‑131‑2003, FC‑131‑2004",
target = "_blank")),
tags$li(
tags$a(
href = "https://international.neb.com/tools-and-resources/selection-charts/nebnext-multiplex-oligos-selection-chart",
"NEBNext Multiplex Oligos for Illumina (96 Unique Dual Index Primer Pairs), Sets 1-4, Cat.# E6440, E6442, E6444, E6446",
target = "_blank")),
tags$li(tags$a(
href = "https://www.zymoresearch.de/products/zymo-seq-udi-primer-sets",
"Zymo-Seq UDI Primer Set, set A, Cat.# D3096", target = "_blank"))
),
tags$a(href = "https://github.com/angelovangel/samplesheet-generator/issues/new?labels=new_kit&title=New+index+kit+request",
"Request to include a new kit by opening an issue on GitHub",
target = "_blank")
)
)
})
# -------------------------------observer to update set input based on kit selected
# for kits with one set only
observe({
if(input$indexkit == "zymo" | input$indexkit == "nextera-dna-cd96") {
updatePickerInput(session = session,
inputId = "set",
choices = c("A"),
selected = "A")
} else {
updatePickerInput(session = session,
inputId = "set",
choices = c("A", "B", "C", "D"),
selected = "A")
}
})
#---------------------------------------------------------observer to update sh_values
observe({
sh_values$date <- input$date
sh_values$investigator <- input$investigator
sh_values$description <- input$description
# assign trimm_seq depending on kit selected, and depending if input$trimming is selected
if( (input$indexkit == "idt-udp" | input$indexkit == "idt-udp-ver2" | input$indexkit == "nextera-dna-cd96") & input$trimming ) {
sh_values$trimm_seq1 <- "CTGTCTCTTATACACATCT"
# neb and zymo have TruSeq adapters
} else if( (input$indexkit == "neb" | input$indexkit == "zymo") & input$trimming ) {
sh_values$trimm_seq1 <- "AGATCGGAAGAGCACACGTCTGAACTCCAGTCA"
sh_values$trimm_seq2 <- "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT"
} else {
# this may seem unnecessary, but I need NULL, not NA, which happens if directly input$trimm_seq1 is used
sh_values$trimm_seq1 <- NULL
sh_values$trimm_seq2 <- NULL
}
# this may seem unnecessary, but I need NULL, not NA
if( !is.na(input$read1) | !is.na(input$read2) ) {
sh_values$read1 <- input$read1
sh_values$read2 <- input$read2
} else {
sh_values$read1 <- NULL
sh_values$read2 <- NULL
}
})
# ------------------------------ ---------------------------RENDER OUTPUTS
output$csvread <- function(){
validate(
need(values$csv_data, message = "Preview of pasted data")
)
# check for samples with the same well and same set
dups <- values$csv_data[ , c("Index_Plate_Well", "Index_Plate")]
dups_indexes <- which( duplicated(dups) | duplicated(dups, fromLast = T) )
kableExtra::kable(values$csv_data) %>%
kable_styling(fixed_thead = TRUE,
bootstrap_options = c("hover")) %>%
row_spec(c(dups_indexes), color = "white", background = "#D7261E") %>%
# invalid index well names
row_spec( which(
!values$index_well_valid
#!str_detect(values$csv_data$Index_Plate_Well, "^[A-H][0-1][0-9]$")
), color = "white", background = "#F1C40F" ) %>%
# invalid sample id names
row_spec( which(
!values$sample_id_valid
#!str_detect(values$csv_data$Sample_ID, "^[-_0-9A-Za-z]{2,100}$")
), color = "white", background = "#F1C40F") %>%
# duplicate sample names
row_spec( which(
!values$sample_id_duplicate
), color = "white", background = "#F1C40F")
}
#---------------------------------------------------------preview data
output$shPreview1 <- function(){
# first find duplicate indexes to mark them in kable
dups <- joindata()[ , "index_check"]
dups_indexes <- which( duplicated(dups) | duplicated(dups, fromLast = T) )
kableExtra::kable( joindata()[, ..sh_colnames], "html") %>% # for ..sh_colnames --> Perhaps you intended DT[, ..sh_colnames]. This difference to data.frame is deliberate and explained in FAQ 1.1.
kable_styling(fixed_thead = TRUE,
bootstrap_options = c("hover")) %>%
row_spec(c(dups_indexes), color = "white", background = "#D7261E")
}
#---------------------------------------------------------preview sample sheet header
output$shPreview2 <- renderPrint({
write( sh(), file = "");
write.table(joindata()[, ..sh_colnames],
file = "",
append = TRUE,
sep = ",",
quote = FALSE,
col.names = TRUE,
row.names = FALSE)
})
#---------------------------------------------------------value boxes renders
output$samples_pasted <- renderValueBox({
color <- ifelse(values$samples_pasted > 0, "green", "yellow")
valueBox(values$samples_pasted, "samples pasted", color = color)
})
# output$samples_matched <- renderValueBox({
# color <- ifelse(values$samples_matched == values$samples_pasted && values$samples_matched != 0,
# "green", "yellow")
# validate(need(indexdata(), message = "Set is required!"))
# valueBox(values$samples_matched, "samples with matched index", color = color)
# })
output$samples_valid <- renderValueBox({
color <- ifelse(values$samples_pasted == values$samples_valid && values$samples_valid != 0,
"green", "yellow")
validate(need(indexdata(), message = "Set is required!"))
valueBox(values$samples_valid, "valid samples", color = color)
})
output$samples_clashed <- renderValueBox({
color <- ifelse(values$samples_clashed == 0, "green", "red")
validate(need(indexdata(), message = "Set is required!"))
valueBox(values$samples_clashed, "samples with clash index", color = color)
})
#}
#})
# -------------------------------------------------------------TAB3 generate samplesheet and download
output$download1 <- downloadHandler(
filename = paste(Sys.Date(), "-samplesheet.csv", sep = ""),
content = function(file) {
write(sh(), file = file) #----------------------------------sh is constructed in REACTIVES above
write.table(joindata()[, ..sh_colnames],
file = file,
append = TRUE,
sep = ",",
quote = FALSE,
col.names = TRUE,
row.names = FALSE) # important!
#nx_report_error("Error!", "This feature is still in development")
}
)
output$download2 <- downloadHandler(
filename = paste(Sys.Date(), "-sample-index.csv", sep = ""),
content = function(file) {fwrite( joindata()[, ..sh_colnames], sep = ",", file = file )}
)
observeEvent(input$reset, {
shinyalert(title = "",
type = "warning",
text = "Restart app?",
html = TRUE,
confirmButtonText = "Start again",
showCancelButton = TRUE,
callbackJS = "function(x) { if (x == true) {history.go(0);} }" # restart app by reloading page
)
})
}
shinyApp(ui, server)