In this tutorial, we use the iTOL template structure and usage data to show itol.toolkit basic workflow. The tree file is generated by weighted clustering based on template parameters.
The following packages are used.
library(itol.toolkit) # main package
library(dplyr) # data manipulation
library(data.table) # file read
library(ape) # tree operation
library(stringr) # string operation
library(tidyr) # data manipulation
The tree file is a built-in file in the package, which can be located
by the system.file
function. Users can find the file in the
path and upload it to iTOL as the main tree.
The template_groups data contains the 23 templates’ name and their types. We cluster the template types based on the parameter similarity and function type.
The template_parammeters_count data contains the template usage count in public papers. We searched the data from GitHub or requests from authors.
Here is an example of 9 annotation datasets. Run the code block to get the datasets.
<- system.file("extdata","tree_of_itol_templates.tree",package = "itol.toolkit")
tree data("template_groups")
data("template_parameters_count")
<- create_hub(tree = tree)
hub
## 1,7 data
<- data.frame(id = unique(template_groups$group),
df_group data = unique(template_groups$group))
## 2 data
<- cbind(template_groups,as.data.frame(rowSums(template_parameters_count)))
df_count
## 3 data
<- data.frame(id = template_groups$template,
df_rename new_label = str_to_title(str_replace_all(template_groups$template,"_"," ")))
## 5 data
<- as.data.frame(t(template_parameters_count))
tab_tmp_01 <- convert_01_to_connect(tab_tmp_01)
tab_tmp_connect <- full_join(tab_tmp_connect, template_groups, by=c("row" = "template"))
tab_tmp_connect <- tab_tmp_connect %>% filter(val > 10) %>% filter(row != col)
tab_tmp_connect
## 6 data
<- fread(system.file("extdata","parameter_groups.txt",package = "itol.toolkit"))
tab_tmp <- tab_tmp[,c(1,2)]
tab_id_group <- tab_tmp[,-c(1,2)]
tab_tmp <- convert_01(object = tab_tmp)
tab_tmp_01 <- cbind(tab_id_group,tab_tmp_01)
tab_tmp_01
<- c("type","separator","profile","field","common themes","specific themes","data")
order
<- tab_tmp_01 %>% tidyr::gather(key = "variable",value = "value",c(-parameter,-group))
tab_tmp_01_long
<- tab_tmp_01_long %>% group_by(group,variable) %>% summarise(sublen = sum(value)) %>% tidyr::spread(key=variable,value=sublen)
template_start_group $group <- factor(template_start_group$group,levels = order)
template_start_group<- template_start_group %>% arrange(group)
template_start_group <- data.frame(Var1 = template_start_group$group, Freq = apply(template_start_group[,-1], 1, max))
start_group $start <- 0
start_groupfor (i in 2:nrow(start_group)) {
$start[i] <- sum(start_group$Freq[1:(i-1)])
start_group
}== 0] <- NA
template_start_group[template_start_group <- template_start_group[,2:(ncol(template_start_group)-1)] + start_group$start
template_end_group <- data.frame(group = order,template_end_group)
template_end_group <- template_end_group %>% tidyr::gather(key = "variable",value = "value",-group)
template_end_group_long names(template_end_group_long)[3] <- "end"
$start <- rep(start_group$start,length(unique(template_end_group_long$variable)))
template_end_group_long<- template_end_group_long %>% na.omit()
template_end_group_long $length <- sum(start_group$Freq)
template_end_group_long<- template_end_group_long[,c(2,5,4,3,1)]
template_end_group_long $group <- factor(template_end_group_long$group,levels = order)
template_end_group_long
## 8 data
<- fread(system.file("extdata","templates_frequence.txt",package = "itol.toolkit"))
df_values names(df_values) <- c("id","Li,S. et al. (2022) J. Hazard. Mater.","Zheng,L. et al. (2022) Environ. Pollut.","Welter,D.K. et al. (2021) mSystems","Zhang,L et al. (2022) Nat. Commun.","Rubbens,P. et al. (2019) mSystems","Laidoudi,Y. et al. (2022) Pathogens","Wang,Y. et al. (2022) Nat. Commun.","Ceres,K.M. et al. (2022) Microb. Genomics","Youngblut,N.D. et al. (2019) Nat. Commun.","BalvĂn,O. et al. (2018) Sci. Rep.","Prostak,S.M. et al. (2021) Curr. Biol.","Dijkhuizen,L.W. et al. (2021) Front. Plant Sci.","Zhang,X. et al. (2022) Microbiol. Spectr.","Peris,D. et al. (2022) PLOS Genet.","Denamur,E. et al. (2022) PLOS Genet.","Dezordi,F.Z. et al. (2022) bioRxiv","Lin,Y. et al. (2021) Microbiome","Wang,Y. et al. (2022) bioRxiv","Qi,Z. et al. (2022) Food Control","Zhou,X. et al. (2022) Food Res. Int.","Zhou,X. et al. (2022) Nat. Commun.")
names(df_values) <- str_remove_all(names(df_values),"[()]")
names(df_values) <- str_replace_all(names(df_values),",","-")
## 9 data
<- fread(system.file("extdata","templates_frequence.txt",package = "itol.toolkit"))
df_value <- df_value %>% tidyr::pivot_longer(-templates) %>% na.omit() %>% select(templates,value) %>% as.data.frame()
df_value $value <- log(df_value$value) df_value
Using default themes to create the unit object by the
create_unit
function. For most template types only need two
columns of data. The other data can be defined by parameter or
auto-identified by programming.
The data is for annotation datasets in data frame format. The key is for the output file name if all units are merged into the hub and written out by the hub object. The type is for the template name. The tree is for the main tree path or phylo object. The other parameters are used relating to the different template types.
<- create_unit(data = df_group,
unit_1 key = "E1_template_types",
type = "TREE_COLORS",
subtype = "clade",
line_type = c(rep("normal",4),"dashed"),
size_factor = 5,
tree = tree)
<- create_unit(data = df_count,
unit_2 key = "E2_parameter_number",
type = "DATASET_SYMBOL",
position = 1,
tree = tree)
<- create_unit(data = df_rename,
unit_3 key = "E3_template_rename",
type = "LABELS",
tree = tree)
<- create_unit(data = template_groups,
unit_4 key = "E4_template_name_color",
type = "DATASET_STYLE",
subtype = "label",
position = "node",
size_factor = 1.5,
tree = tree)
<- create_unit(data = tab_tmp_connect[,1:4],
unit_5 key = "E5_template_similarity",
type = "DATASET_CONNECTION",
tree = tree)
<- create_unit(data = template_end_group_long,
unit_6 key = "E6_template_parameters_structure",
type = "DATASET_DOMAINS",
tree = tree)
<- create_unit(data = df_group,
unit_7 key = "E7_template_types",
type = "DATASET_COLORSTRIP",
tree = tree)
<- create_unit(data = df_values,
unit_8 key = "E8_usage_count_among_publications",
type = "DATASET_HEATMAP",
tree = tree)
<- create_unit(data = df_value,
unit_9 key = "E9_log_transformed_usage_count",
type = "DATASET_BOXPLOT",
tree = tree)
In the unit and hub object, we can change the theme setup. The theme parameters in 23 templates have 114 kinds. We cluster the parameters based on their function and specification.
@specific_themes$basic_plot$size_max <- 40
unit_2
@specific_themes$basic_plot$size_max <- 100
unit_5
@specific_themes$heatmap$color$min <- "#ffd966"
unit_8@specific_themes$heatmap$color$max <- "#cc0000"
unit_8@specific_themes$heatmap$use_mid <- 0
unit_8
@specific_themes$basic_plot$size_max <- 100 unit_9
For multi levels annotation, we can merge the units in one hub. This
hub object can be saved locally or output template files by the
write_hub
function.
<- hub +
hub +
unit_1 +
unit_2 +
unit_3 +
unit_4 +
unit_5 +
unit_6 +
unit_7 +
unit_8
unit_9
write_hub(hub,getwd())