Get_Start

In this tutorial, we use the iTOL template structure and usage data to show itol.toolkit basic workflow. The tree file is generated by weighted clustering based on template parameters.

Data paraperation

The tree file is a built-in file in the package, which can be located by the system.file function. Users can find the file in the path and upload it to iTOL as the main tree.

The template_groups data contains the 23 templates’ name and their types. We cluster the template types based on the parameter similarity and function type.

The template_parammeters_count data contains the template usage count in public papers. We searched the data from GitHub or requests from authors.

Here is an example of 9 annotation datasets. Run the code block to get the datasets.

tree <- system.file("extdata","tree_of_itol_templates.tree",package = "itol.toolkit")
data("template_groups")
data("template_parameters_count")
hub <- create_hub(tree = tree)

## 1,7 data
df_group <- data.frame(id = unique(template_groups$group), 
                       data = unique(template_groups$group))

## 2 data
df_count <- cbind(template_groups,as.data.frame(rowSums(template_parameters_count)))

## 3 data
df_rename <- data.frame(id = template_groups$template, 
                        new_label = str_to_title(str_replace_all(template_groups$template,"_"," ")))

## 5 data
tab_tmp_01 <- as.data.frame(t(template_parameters_count))
tab_tmp_connect <- convert_01_to_connect(tab_tmp_01)
tab_tmp_connect <- full_join(tab_tmp_connect, template_groups, by=c("row" = "template"))
tab_tmp_connect <- tab_tmp_connect %>% filter(val > 10) %>% filter(row != col)

## 6 data
tab_tmp <- fread(system.file("extdata","parameter_groups.txt",package = "itol.toolkit"))
tab_id_group <- tab_tmp[,c(1,2)]
tab_tmp <- tab_tmp[,-c(1,2)]
tab_tmp_01 <- convert_01(object = tab_tmp)
tab_tmp_01 <- cbind(tab_id_group,tab_tmp_01)

order <- c("type","separator","profile","field","common themes","specific themes","data")

tab_tmp_01_long <- tab_tmp_01 %>% tidyr::gather(key = "variable",value = "value",c(-parameter,-group))

template_start_group <- tab_tmp_01_long %>% group_by(group,variable) %>% summarise(sublen = sum(value)) %>% tidyr::spread(key=variable,value=sublen)
template_start_group$group <- factor(template_start_group$group,levels = order)
template_start_group <- template_start_group %>% arrange(group)
start_group <- data.frame(Var1 = template_start_group$group, Freq = apply(template_start_group[,-1], 1, max))
start_group$start <- 0
for (i in 2:nrow(start_group)) {
  start_group$start[i] <- sum(start_group$Freq[1:(i-1)])
}
template_start_group[template_start_group == 0] <- NA
template_end_group <- template_start_group[,2:(ncol(template_start_group)-1)] + start_group$start
template_end_group <- data.frame(group = order,template_end_group)
template_end_group_long <- template_end_group %>% tidyr::gather(key = "variable",value = "value",-group)
names(template_end_group_long)[3] <- "end"
template_end_group_long$start <- rep(start_group$start,length(unique(template_end_group_long$variable)))
template_end_group_long <- template_end_group_long %>% na.omit()
template_end_group_long$length <- sum(start_group$Freq)
template_end_group_long <- template_end_group_long[,c(2,5,4,3,1)]
template_end_group_long$group <- factor(template_end_group_long$group,levels = order)

## 8 data
df_values <- fread(system.file("extdata","templates_frequence.txt",package = "itol.toolkit"))
names(df_values) <- c("id","Li,S. et al. (2022) J. Hazard. Mater.","Zheng,L. et al. (2022) Environ. Pollut.","Welter,D.K. et al. (2021) mSystems","Zhang,L et al. (2022) Nat. Commun.","Rubbens,P. et al. (2019) mSystems","Laidoudi,Y. et al. (2022) Pathogens","Wang,Y. et al. (2022) Nat. Commun.","Ceres,K.M. et al. (2022) Microb. Genomics","Youngblut,N.D. et al. (2019) Nat. Commun.","Balvín,O. et al. (2018) Sci. Rep.","Prostak,S.M. et al. (2021) Curr. Biol.","Dijkhuizen,L.W. et al. (2021) Front. Plant Sci.","Zhang,X. et al. (2022) Microbiol. Spectr.","Peris,D. et al. (2022) PLOS Genet.","Denamur,E. et al. (2022) PLOS Genet.","Dezordi,F.Z. et al. (2022) bioRxiv","Lin,Y. et al. (2021) Microbiome","Wang,Y. et al. (2022) bioRxiv","Qi,Z. et al. (2022) Food Control","Zhou,X. et al. (2022) Food Res. Int.","Zhou,X. et al. (2022) Nat. Commun.")
names(df_values) <- str_remove_all(names(df_values),"[()]")
names(df_values) <- str_replace_all(names(df_values),",","-")

## 9 data
df_value <- fread(system.file("extdata","templates_frequence.txt",package = "itol.toolkit"))
df_value <- df_value %>% tidyr::pivot_longer(-templates) %>% na.omit() %>% select(templates,value) %>% as.data.frame()
df_value$value <- log(df_value$value)

Create unit

Using default themes to create the unit object by the create_unit function. For most template types only need two columns of data. The other data can be defined by parameter or auto-identified by programming.

The data is for annotation datasets in data frame format. The key is for the output file name if all units are merged into the hub and written out by the hub object. The type is for the template name. The tree is for the main tree path or phylo object. The other parameters are used relating to the different template types.

unit_1 <- create_unit(data = df_group, 
                      key = "E1_template_types", 
                      type = "TREE_COLORS", 
                      subtype = "clade", 
                      line_type = c(rep("normal",4),"dashed"),
                      size_factor = 5, 
                      tree = tree)

unit_2 <- create_unit(data = df_count, 
                      key = "E2_parameter_number", 
                      type = "DATASET_SYMBOL",
                      position = 1, 
                      tree = tree)

unit_3 <- create_unit(data = df_rename, 
                      key = "E3_template_rename", 
                      type = "LABELS",
                      tree = tree)

unit_4 <- create_unit(data = template_groups, 
                      key = "E4_template_name_color", 
                      type = "DATASET_STYLE", 
                      subtype = "label",
                      position = "node",
                      size_factor = 1.5,
                      tree = tree)

unit_5 <- create_unit(data = tab_tmp_connect[,1:4], 
                      key = "E5_template_similarity", 
                      type = "DATASET_CONNECTION", 
                      tree = tree)

unit_6 <- create_unit(data = template_end_group_long, 
                      key = "E6_template_parameters_structure", 
                      type = "DATASET_DOMAINS", 
                      tree = tree)

unit_7 <- create_unit(data = df_group, 
                      key = "E7_template_types", 
                      type = "DATASET_COLORSTRIP", 
                      tree = tree)

unit_8 <- create_unit(data = df_values, 
                      key = "E8_usage_count_among_publications", 
                      type = "DATASET_HEATMAP", 
                      tree = tree)

unit_9 <- create_unit(data = df_value, 
                      key = "E9_log_transformed_usage_count", 
                      type = "DATASET_BOXPLOT", 
                      tree = tree)

Get_Start

Data paraperation

Create unit

Theme setup

Merge