This is a static website and demo of a data catalog. Search for data objects in the metadata viewer, or visualize data product relationships in the directed acyclic graph (DAG) below.
NORTHWEST PATHOGEN GENOMICS CENTER OF EXCELLENCE
NORTHWEST PATHOGEN GENOMICS CENTER OF EXCELLENCE
Data Catalog
#| standalone: true
#| #components: [editor, viewer]
#| viewerHeight: 1000
#| column: page
## file: app.R
# import dag
library(shiny)
library(DiagrammeR)
library(dplyr)
library(tidyr)
# Read the CSV file when the script starts
metadata <- read.csv(url("https://raw.githubusercontent.com/NW-PaGe/dstt_catalog_demo/refs/heads/main/metadata.csv")) %>%
mutate(across(where(is.character), function(.) ifelse(. == '', NA_character_, .)))
# Define UI for the app
dag_ui <- fluidPage(
sidebarLayout(
sidebarPanel(
h4("Select nodes"),
checkboxGroupInput("selected_nodes", "Nodes", choices = unique(metadata$Product_Name), selected = unique(metadata$Product_Name)),
hr()
),
mainPanel(
grVizOutput("dag_plot", width = "100%", height = "600px")
)
)
)
# Define server logic for the app
dag_server <- function(input, output, session) {
# Create a reactive DAG from selected nodes
reactive_dag <- reactive({
### Create nodes based on inclusion in selected input
nodes_final <- metadata %>%
filter(Product_Name %in% input$selected_nodes) %>%
select(Product_Name) %>%
mutate(Product_Name_Copy = Product_Name) %>%
distinct()
### Create edges based on `Parent` and `Child` columns ----------------
# unnest any nodes with 2+ parents or children
unnested_data <- metadata %>%
select(Product_Name, Parent, Child) %>%
mutate(Product_Name_Copy = Product_Name) %>%
tidyr::separate_rows(Parent, sep = ";") %>%
tidyr::separate_rows(Child, sep = ";") %>%
distinct()
# identify all possible parents
parent_df <- unnested_data %>%
filter(!is.na(Parent)) %>%
mutate(Child = Product_Name)
# identify all possible children
child_df <- unnested_data %>%
filter(!is.na(Child)) %>%
mutate(Parent = Product_Name)
# join parents and children to get all possible edges
edges <- rbind(parent_df, child_df) %>%
select(Parent, Child) %>%
distinct()
# get direct edges (edges where no product has been deselected)
direct_edges <- edges %>%
filter(Parent %in% input$selected_nodes & Child %in% input$selected_nodes) %>%
mutate(style = 'solid') # set edge attribute so line will be solid
# get indirect edges (edges where child or parent has been deselected)
indirect_parent <- edges %>%
filter(!Parent %in% input$selected_nodes) %>%
rename(removed=Parent)
indirect_child <- edges %>%
filter(!Child %in% input$selected_nodes) %>%
rename(removed=Child)
indirect_edges <- indirect_parent %>%
left_join(indirect_child, by='removed') %>%
select(-removed) %>%
mutate(style = 'dashed') # set edge attribute so line will be dashed
# concat dfs for final edges df with direct/indirect relationships included
edges_final <- rbind(indirect_edges, direct_edges)
### Create the DAG using DiagrammeR ------------------------------------------
dag <- create_graph() %>%
add_nodes_from_table(table = nodes_final,
label_col = "Product_Name") %>%
add_edges_from_table(table = edges_final,
from_col = "Parent",
to_col = "Child",
from_to_map = "Product_Name_Copy")
return(dag)
})
observe({
# Render the DAG plot
output$dag_plot <- renderGrViz({
dag <- reactive_dag()
dag %>% render_graph()
})
})
}
# # render shinyapp
# shinyApp(ui=dag_ui, server=dag_server)
# import docx_to_html function
docx_to_html <- function(doc) {
#' Convert docx to html
#'
#' Return a semi formatted html version of a docx file. This function
#' accepts docx objects (output by officer::read_docx) and outputs an html-marked character element
#' with some headers and tables formatted. Media will not be included in output.
#'
#' @param doc docx object output from officer::read_docx
#' @seealso [officer::read_docx()]
#'
library(officer)
library(dplyr)
library(htmltools)
doc_content <- officer::docx_summary(doc) # convert doc to data frame with rows of docx elements
doc_content <- dplyr::arrange(doc_content, doc_index, row_id, cell_id) # arrange df by index, then row id, then cell id
html_content <- "" # Initialize html content element
is_table <- FALSE # Set flag to check if a table is being constructed
# Step through the document content rowwise to render paragraphs, headers, tables, and page breaks
for (i in 1:nrow(doc_content)) {
# close the row and table if the previous row was part of a table
if (is_table & !doc_content$content_type[i] %in% "table cell") {
html_content <- paste0(html_content, "</tr></table>") # Close row and table
is_table <- FALSE # reset to false
}
# if row is paragraph type ...
if (doc_content$content_type[i] %in% "paragraph") {
# insert a horizontal line if a page break is present
if (tolower(doc_content$text[i]) %in% 'page break') {
html_content <- paste0(html_content, htmltools::hr())
# otherwise, if a heading style, insert text with a heading tag (add +1 to heading # as we assign title as h1 later)
} else if (grepl("heading [1-5]", doc_content$style_name[i])) {
header = gsub("heading ([1-5])", "\\1", doc_content$style_name[i])
html_content <- paste0(
html_content,
switch(as.numeric(header)+1,
htmltools::h1(doc_content$text[i]),
htmltools::h2(doc_content$text[i]),
htmltools::h3(doc_content$text[i]),
htmltools::h4(doc_content$text[i]),
htmltools::h5(doc_content$text[i]),
htmltools::h6(doc_content$text[i])
)
)
# otherwise, if a title style, insert text with h1 tags
} else if (tolower(doc_content$style_name[i]) %in% "title") {
html_content <- paste0(html_content, htmltools::h1(doc_content$text[i]))
# otherwise, insert text with paragraph tags
} else {
html_content <- paste0(html_content, htmltools::p(doc_content$text[i]))
}
# otherwise (if not a paragraph type), if it is a table cell in row 1 cell 1 (meaning, the first cell of a table), insert the table border
} else if (doc_content$content_type[i] %in% "table cell" & doc_content$row_id[i]%in%1 & doc_content$cell_id[i]%in%1) {
html_content <- paste0(html_content, "<table border='1'>") # Start a table
is_table <- TRUE # set table flag to true
# otherwise, if table cell and the first cell of a new row, add a new row tag
} else if (doc_content$content_type[i] %in% "table cell" & doc_content$row_id[i] > doc_content$row_id[i-1]) {
html_content <- paste0(html_content, "</tr><tr>") # Close prev row and start a new row
# insert text if not paragraph of table cell w/o formatting
} else if (!doc_content$content_type[i] %in% c("paragraph", "table cell")) {
html_content <- paste0(html_content, doc_content$text[i])
}
# if a table cell, insert text with td tag
if (doc_content$content_type[i] %in% "table cell") {
html_content <- paste0(html_content, "<td>", doc_content$text[i], "</td>") # Add cell content
}
}
# convert character element to element marked as HTML
HTML(html_content)
}
# import split_values.R
library(stringr)
split_values <- function(x, sep=';', unlist=FALSE) {
#' split_values
#'
#' Function splits char elements inside char vectors and removes surrounding whitespace
#'
#' @param x character or vector of characters
#' @param sep string to split on; default ";"
#' @param unlist whether or not to unlist the character list; default FALSE
#' @return a list (or vector, if unlist=TRUE) of character elements
#'
#' @example split_values(x=c('this', 'string; is', 'split'))
#' @example split_values(x=c('This will be return as a vector'), sep=',', unlist=TRUE)
#'
x = stringr::str_split(x, sep) # split by sep and return string
x = sapply(x, trimws) # trim whitespace of strings in list
if (unlist) {
x = unlist(x)
}
return(x)
}
# import server & ui code
library(shiny)
library(DT)
library(officer)
library(duckdb)
library(jsonlite)
# Read the CSV file when the script starts
metadata <- read.csv(url("https://raw.githubusercontent.com/NW-PaGe/dstt_catalog_demo/refs/heads/main/metadata.csv")) %>%
mutate(across(where(is.character), function(.) ifelse(. == '', NA_character_, .)))
# Define the UI
metadata_ui <- fluidPage(
# Add custom CSS
tags$head(
tags$style(HTML("
.filter-tag {
display: inline-block;
margin: 3px;
padding: 6px 12px;
background-color: #3498db;
color: white;
border-radius: 15px;
cursor: pointer;
font-size: 14px;
transition: background-color 0.3s ease;
}
.filter-tag:hover {
background-color: #2980b9;
}
.filter-label {
font-weight: bold;
margin-right: 5px;
}
.filter-value {
font-weight: normal;
}
.remove-icon {
margin-left: 8px;
font-weight: bold;
opacity: 0.8;
}
.filter-tag:hover .remove-icon {
opacity: 1;
}
#fileListContent {
max-height: none;
overflow-y: hidden;
}
#fileListContent li:nth-child(n+7) {
display: none;
}
#fileListContent li:nth-child(6) {
white-space: nowrap;
overflow: visible;
text-overflow: ellipsis;
opacity: 0.7;
position: relative;
padding-right: 20px;
list-style-type: disc !important;
}
#fileListContent li:nth-child(6) a {
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
display: block;
}
#fileListContent li:nth-child(6)::after {
content: '';
position: absolute;
bottom: 0;
left: -20px;
right: 0;
height: 60%;
background: linear-gradient(to bottom, transparent, white);
pointer-events: none;
}
#fileListContent.expanded li {
display: list-item !important;
white-space: normal !important;
opacity: 1 !important;
}
#fileListContent.expanded li:nth-child(6)::after {
display: none;
}
#toggleBtn {
color: #3498db;
cursor: pointer;
border: none;
background: none;
padding: 5px 10px;
width: 100%;
text-align: center;
margin-top: 10px;
font-size: 16px;
}
.file-list-ul {
list-style-type: disc !important;
padding-left: 20px;
margin: 0;
}
.file-list-ul li {
list-style-type: disc !important;
}
")),
tags$script(HTML("
function toggleFileList() {
var content = document.getElementById('fileListContent');
var btn = document.getElementById('toggleBtn');
if (content.classList.contains('expanded')) {
content.classList.remove('expanded');
btn.innerHTML = '▾';
} else {
content.classList.add('expanded');
btn.innerHTML = '▴';
}
}
"))
),
sidebarLayout(
sidebarPanel(
# Static text inputs for certain columns
textInput("product_id", "Product ID", ""),
textInput("product_name", "Product Name", ""),
textInput("keywords", "Keywords", "", placeholder = "variant, genbank, lineage"),
# Static text inputs dynamically pulled from metadata.csv for certain columns
selectInput("location", "Location", choices = c("All", unique(split_values(metadata$Location, unlist=T))), selected = "All"),
selectInput("steward", "Steward", choices = c("All", unique(split_values(metadata$Steward, unlist=T))), selected = "All"),
selectInput("users", "Users", choices = c("All", unique(split_values(metadata$Users, unlist=T))), selected = "All"),
selectInput("pii", "PII", choices = c("All", unique(split_values(metadata$PII, unlist=T))), selected = "All"),
selectInput("source", "Source", choices = c("All", unique(split_values(metadata$Source, unlist=T))), selected = "All"),
selectInput("ftype", "File Type", choices = c("All", unique(tolower(gsub("^.+\\.(.+)$", "\\1", metadata$Connection)))), selected = "All")
),
mainPanel(
h3("Filters Applied:"),
uiOutput("filtersSummary"),
br(),
h3("Filtered File Paths:"),
uiOutput("fileList"),
br(),
h3("Selected File Data:"),
DTOutput("fileDataTable"),
uiOutput("docx_content"),
br()
)
)
)
# Define the server logic
metadata_server <- function(input, output, session) {
observe({
output$fileList <- renderUI({
file_paths <- filteredData()$Connection
tagList(
tags$div(
id = "fileListContent",
tags$ul(
class = "file-list-ul",
lapply(seq_along(file_paths), function(i) {
tags$li(
actionLink(
inputId = paste0("file_", i),
label = file_paths[i],
onclick = sprintf("Shiny.setInputValue('last_clicked', '%s', {priority: 'event'});",
paste0("file_", i))
)
)
})
)
),
tags$button(
id = "toggleBtn",
onclick = "toggleFileList()",
"▾"
)
)
})
}, priority = 1000)
# Original filtering logic remains the same
filteredData <- reactive({
data <- metadata
# Apply filters based on input values
if (!is.null(input$product_id) && input$product_id != "") {
data <- subset(data, grepl(input$product_id, Product_ID, ignore.case = TRUE))
}
if (!is.null(input$product_name) && input$product_name != "") {
data <- subset(data, grepl(input$product_name, Product_Name, ignore.case = TRUE))
}
if (input$keywords != "") {
data <- subset(data, grepl(input$keywords, Keywords, ignore.case = TRUE))
}
if (!is.null(input$location) && input$location != "All") {
data <- subset(data, sapply(split_values(Location), function(.) input$location %in% .))
}
if (!is.null(input$steward) && input$steward != "All") {
data <- subset(data, sapply(split_values(Steward), function(.) input$steward %in% .))
}
if (!is.null(input$users) && input$users != "All") {
data <- subset(data, sapply(split_values(Users), function(.) input$users %in% .))
}
if (!is.null(input$pii) && input$pii != "All") {
data <- subset(data, sapply(split_values(PII), function(.) input$pii %in% .))
}
if (!is.null(input$source) && input$source != "All") {
data <- subset(data, sapply(split_values(Source), function(.) input$source %in% .))
}
if (!is.null(input$ftype) && input$ftype != "All") {
data <- subset(data, grepl(paste0("\\.", input$ftype), Connection, ignore.case=TRUE))
}
return(data)
})
# Modified filters summary with enhanced styling
output$filtersSummary <- renderUI({
filters <- list()
if (!is.null(input$product_id) && input$product_id != "") {
filters$product_id <- list(label = "Product ID", value = input$product_id)
}
if (!is.null(input$product_name) && input$product_name != "") {
filters$product_name <- list(label = "Product Name", value = input$product_name)
}
if (!is.null(input$keywords) && input$keywords != "") {
filters$keywords <- list(label = "Keywords", value = input$keywords)
}
if (!is.null(input$location) && input$location != "All") {
filters$location <- list(label = "Location", value = input$location)
}
if (!is.null(input$steward) && input$steward != "All") {
filters$steward <- list(label = "Steward", value = input$steward)
}
if (!is.null(input$users) && input$users != "All") {
filters$users <- list(label = "Users", value = input$users)
}
if (!is.null(input$pii) && input$pii != "All") {
filters$pii <- list(label = "PII", value = input$pii)
}
if (!is.null(input$source) && input$source != "All") {
filters$source <- list(label = "Source", value = input$source)
}
if (!is.null(input$ftype) && input$ftype != "All") {
filters$ftype <- list(label = "File Type", value = input$ftype)
}
if (length(filters) == 0) {
return(p("No filters applied."))
} else {
# Create interactive filter tags with enhanced styling
tags$div(
lapply(names(filters), function(filter_name) {
tags$div(
class = "filter-tag",
onclick = sprintf("Shiny.setInputValue('remove_filter', '%s', {priority: 'event'});", filter_name),
tags$span(
class = "filter-label",
filters[[filter_name]]$label
),
tags$span(
class = "filter-value",
filters[[filter_name]]$value
),
tags$span(
class = "remove-icon",
HTML("×")
)
)
})
)
}
})
# Observer to handle filter removal
observeEvent(input$remove_filter, {
filter_name <- input$remove_filter
if (filter_name %in% c("product_id", "product_name", "keywords")) {
updateTextInput(session, filter_name, value = "")
} else {
updateSelectInput(session, filter_name, selected = "All")
}
})
# Reactive value to store the selected file path
selectedFile <- reactiveVal(NULL)
# Replace the original observer with this new one that responds to last_clicked
observeEvent(input$last_clicked, {
file_index <- as.numeric(gsub("file_", "", input$last_clicked))
file_paths <- filteredData()$Connection
if (file_index <= length(file_paths)) {
selectedFile(file_paths[file_index])
}
}, ignoreInit = TRUE)
# Render the data from the selected file
output$fileDataTable <- renderDT({
req(selectedFile()) # Ensure there is a selected file
if (grepl('\\.csv$', selectedFile())) {
tryCatch({
file_url <- paste0('https://raw.githubusercontent.com/NW-PaGe/dstt_catalog_demo/refs/heads/main/', selectedFile())
file_data <- read.csv(url(file_url))
# Convert dataframe to datatable object
datatable(file_data, options = list(pageLength = 10, autoWidth = TRUE))
}, error = function(e) {
datatable(data.frame(Error = paste("Unable to read file:", e$message)),
options = list(pageLength = 1, dom = 't'))
})
} else if (grepl('\\.parquet$', selectedFile())) {
tryCatch({
# get file location url
file_url <- paste0('https://raw.githubusercontent.com/NW-PaGe/dstt_catalog_demo/refs/heads/main/', selectedFile())
# check file size before downloading:
api_url <- paste0("https://api.github.com/repos/NW-PaGe/dstt_catalog_demo/contents/", selectedFile()) # Construct the GitHub API URL for the file
con <- url(api_url, "rb") # open connection
response <- readLines(con) # get response
close(con) # close connection
json_data <- jsonlite::fromJSON(paste(response, collapse = '')) # parse json response
file_size <- as.numeric(json_data$size)/1024 # get size in KB
# download parquet if small enough file
if (file_size < 100000) { # check if file is under 100 KB before attempting download
# Need to download temp file since httpfs extension not usable with Shinylive:
temp_file <- tempfile(fileext = ".parquet") # create temp parquet file
download.file(file_url, temp_file, mode = "wb") # write data from file at url to temp file
# read parquet
con <- dbConnect(duckdb(), ':memory:') # create db in mem
file_data <- dbGetQuery(con, glue::glue("SELECT * FROM read_parquet('{temp_file}') LIMIT 10000")) # Read data and limit preview size to 10k rows
dbDisconnect(con, shutdown = TRUE) # close in mem db connection
# convert dataframe to datatable object
datatable(file_data, options = list(pageLength = 10, autoWidth = TRUE))
} else {
datatable(data.frame(Warning = paste("Unable to read file: File too large. File size (KB):", format(file_size, nsmall=2, big.mark=','))),
options = list(pageLength = 1, dom = 't'))
}
}, error = function(e) {
datatable(data.frame(Error = paste("Unable to read file:", e$message)),
options = list(pageLength = 1, dom = 't'))
})
} else {
datatable(data.frame())
}
})
# Render docx content
output$docx_content <- renderUI({
req(selectedFile()) # Ensure there is a selected file
if (grepl('\\.docx$', selectedFile())) {
tryCatch({
file_url <- paste0('https://raw.githubusercontent.com/NW-PaGe/dstt_catalog_demo/main/', selectedFile())
temp_file <- tempfile(fileext = ".docx")
download.file(file_url, destfile = temp_file, mode = "wb")
# Read the .docx file using officer
doc <- read_docx(temp_file)
# Convert docx to html element using code from src
docx_to_html(doc)
}, error = function(e) {
HTML(paste("Unable to read file:", e$message))
})
} else {
HTML('')
}
})
}
# # Run the application
# shinyApp(ui=metadata_ui, server=metadata_server)
# Create main ui
ui <- fluidPage(
tags$style(
HTML("
h1.title {
text-align: center;
font-weight: bold;
}
hr.div {
width: 100%;
border: 5px dotted #000000;
border-style: none none dotted;
}
")
),
tags$br(id='dag-br'),
h1(id='dag', class='title', 'Directed Acyclic Graph (DAG):'),
tags$br(),
tags$br(),
fluidRow(column(12, dag_ui)), # insert dag ui
tags$hr(class='div'),
tags$br(id='metadata-viewer-br'),
h1(id='metadata-viewer', class='title', 'Metadata Viewer:'),
tags$br(),
tags$br(),
fluidRow(column(12, metadata_ui)) # insert metadata ui
)
# create main app
server <- function(input, output, session) {
dag_server(input, output, session) # insert dag server logic
metadata_server(input, output, session) # insert metadata server logic
}
shinyApp(ui, server)