# Get the data files from package
input_files <- pkg.data$input_files
biom_file <- input_files$biom_files$silva # Path to silva biom file
tree_file <- input_files$tree_files$silva # Path to silva tree file
metadata_file <- input_files$metadata$two_groups # Path to Nephele metadata
parse_func <- parse_taxonomy_silva_128 # A custom phyloseq parsing function for silva annotations
# Get the phyloseq object
phy_obj <- create_phyloseq(biom_file = biom_file,
tree_file = tree_file,
metadata_file = metadata_file,
parse_func = parse_func)
# Get the taxmap object in the raw format
raw_metacoder <- as_MicrobiomeR_format(obj = phy_obj, format = "raw_format")
Metacoder is a highly useful package that comes with tons of features right out of the box. Functions such as metacoder::filter_ambiguous_taxa()
, taxa::filter_taxa()
, and taxa::filter_obs()
for instance can almost always be used in your workflow.
# Remove Archaea from the taxmap object
metacoder_obj <- taxa::filter_taxa(obj = raw_metacoder,
taxon_names == "Archaea",
subtaxa = TRUE,
invert = TRUE)
# Ambiguous Annotation Filter - Remove taxonomies with ambiguous names
metacoder_obj <- metacoder::filter_ambiguous_taxa(metacoder_obj,
subtaxa = TRUE)
Three functions are provided by MicrobiomeR that do some basic filtering:
sample_id_filter()
for filtering samples.taxon_id_filter()
for filtering by taxon_id, which includes intermediate taxa.otu_id_filter()
for filtering by otu_id, which only includes leaf taxa.These functions all take the same parameters, most notably a transformation function (.f_transform), a filtering function (.f_filter), and a conditional function (.f_condition).
The advanced filtering functions available with MicrobiomeR do several things. They wrap the basic filtering functions mentioned above, they wrap common metacoder and taxa functions, and they mimic the tools found in the phyloseq package. Below I’ve mentioned some of them and how they relate to the phyloseq package:
agglomerate_taxmap()
is equivalent to phyloseq::tax_glom()
.otu_proportion_filter()
is seen in the first step in phyloseq’s preprocessing vignette.Prevalence Filtering
functions filter observations by their prevalence across samples.
otu_prevelance_filter()
is seen in phyloseq’s prevalence filtering vignette.taxa_prevalence_filter()
is seen in phyloseq’s taxonomic filtering vignette.cov_filter()
is seen in the ninth step in phyloseq’s preprocessing vignette.# Master Threshold Filter - Add the otu_proportions table and then filter OTUs based on min %
metacoder_obj <- otu_proportion_filter(obj = metacoder_obj,
otu_percentage = 0.00001)
# Taxon Prevalence Filter - Add taxa_abundance and taxa_proportions and then filter OTUs that do not
# appear more than a certian amount of times in a certain percentage of samples at the specified
# agglomerated rank. This is considered a supervised method, because it relies on intermediate
# taxonomies to filter the data.
# The default minimum abundance is 5 and the sample percentage is 0.5 (5%).
# Phylum
metacoder_obj <- taxa_prevalence_filter(obj = metacoder_obj,
rank = "Phylum")
# Class
metacoder_obj <- taxa_prevalence_filter(obj = metacoder_obj,
rank = "Class",
validated = TRUE)
# Order
metacoder_obj <- taxa_prevalence_filter(obj = metacoder_obj,
rank = "Order",
validated = TRUE)
# OTU Prevalence Filter - Filter OTUs that do not appear more than a certian amount of times in a
# certain percentage of samples. This is considered an unsupervised method, because it relies only
# on the leaf OTU ids to filter the data.
metacoder_obj <- otu_prevalence_filter(obj = metacoder_obj,
validated = TRUE)
# Coefficient of Variation Filter - Filter OTUs based on the coefficient of variation
metacoder_obj <- cov_filter(obj = metacoder_obj,
coefficient_of_variation = 3,
validated = TRUE)
As mentioned previously taxmap filtering can be done in any way that fits your needs with the taxa
and metacoder
packages. However, MicrobiomeR also provides some utility based function for filtering/manipulating your observation data by hand. Observation data can be accessed within the taxmap object. Make sure you don’t manipulate existing data inside of your taxmap object unless you’re absolutely sure you know what you’re doing.
# Get the abundance tables from the taxmap object
taxa_abund <- metacoder_obj$data$taxa_abundance
otu_abund <- metacoder_obj$data$otu_abundance
# Transposing with one ID (taxon_id)
taxa_abund %>% transposer(ids = "taxon_id", header_name = "samples")
#> # A tibble: 48 x 176
#> samples ac af ag ah ai aj ak al an ao
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 Sample… 112582 53903 4217 4237 68 46540 1794 11 1524 51
#> 2 Sample… 88194 56426 480 750 23 30145 17 7 38 0
#> 3 Sample… 74933 39952 557 7138 21 26932 73 5 49 0
#> 4 Sample… 80796 41912 629 3234 14 34738 189 12 0 0
#> 5 Sample… 116806 51677 1111 1207 13 62301 47 27 109 11
#> 6 Sample… 90122 33032 350 9 0 56274 0 11 0 0
#> 7 Sample… 129778 61153 2346 2882 58 62494 12 24 338 101
#> 8 Sample… 113377 45411 332 1369 3 65815 228 31 16 1
#> 9 Sample… 134396 52332 3202 1560 44 76700 98 43 143 28
#> 10 Sample… 112240 41003 4624 518 54 65317 276 13 300 63
#> # … with 38 more rows, and 165 more variables: aq <dbl>, as <dbl>,
#> # at <dbl>, av <dbl>, aw <dbl>, ay <dbl>, az <dbl>, ba <dbl>, bb <dbl>,
#> # bc <dbl>, bd <dbl>, be <dbl>, bf <dbl>, bg <dbl>, bi <dbl>, bk <dbl>,
#> # bl <dbl>, bm <dbl>, bo <dbl>, br <dbl>, bs <dbl>, bu <dbl>, bv <dbl>,
#> # bx <dbl>, by <dbl>, bz <dbl>, cb <dbl>, cc <dbl>, ce <dbl>, cf <dbl>,
#> # ch <dbl>, ci <dbl>, cj <dbl>, cl <dbl>, co <dbl>, cq <dbl>, cr <dbl>,
#> # cs <dbl>, cu <dbl>, cv <dbl>, cz <dbl>, da <dbl>, dc <dbl>, dd <dbl>,
#> # de <dbl>, df <dbl>, dh <dbl>, dj <dbl>, dl <dbl>, dn <dbl>, do <dbl>,
#> # dq <dbl>, dr <dbl>, dt <dbl>, dx <dbl>, dy <dbl>, dz <dbl>, ea <dbl>,
#> # ec <dbl>, ed <dbl>, ee <dbl>, ef <dbl>, ei <dbl>, ej <dbl>, eo <dbl>,
#> # ep <dbl>, eq <dbl>, es <dbl>, et <dbl>, eu <dbl>, ex <dbl>, ey <dbl>,
#> # fa <dbl>, fe <dbl>, ff <dbl>, fi <dbl>, fj <dbl>, fl <dbl>, fm <dbl>,
#> # fn <dbl>, fp <dbl>, fq <dbl>, fr <dbl>, fs <dbl>, ft <dbl>, fu <dbl>,
#> # fv <dbl>, fx <dbl>, fy <dbl>, fz <dbl>, ga <dbl>, gb <dbl>, gd <dbl>,
#> # ge <dbl>, gf <dbl>, gg <dbl>, gh <dbl>, gj <dbl>, gk <dbl>, gl <dbl>,
#> # …
# Re-Transposing with one ID (taxon_id)
taxa_abund %>%
transposer("taxon_id", "samples") %>%
transposer("samples", "taxon_id")
#> # A tibble: 175 x 49
#> taxon_id Sample_1 Sample_10 Sample_11 Sample_12 Sample_13 Sample_14
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 ac 112582 88194 74933 80796 116806 90122
#> 2 af 53903 56426 39952 41912 51677 33032
#> 3 ag 4217 480 557 629 1111 350
#> 4 ah 4237 750 7138 3234 1207 9
#> 5 ai 68 23 21 14 13 0
#> 6 aj 46540 30145 26932 34738 62301 56274
#> 7 ak 1794 17 73 189 47 0
#> 8 al 11 7 5 12 27 11
#> 9 an 1524 38 49 0 109 0
#> 10 ao 51 0 0 0 11 0
#> # … with 165 more rows, and 42 more variables: Sample_15 <dbl>,
#> # Sample_16 <dbl>, Sample_17 <dbl>, Sample_18 <dbl>, Sample_19 <dbl>,
#> # Sample_2 <dbl>, Sample_20 <dbl>, Sample_21 <dbl>, Sample_22 <dbl>,
#> # Sample_23 <dbl>, Sample_24 <dbl>, Sample_25 <dbl>, Sample_26 <dbl>,
#> # Sample_27 <dbl>, Sample_28 <dbl>, Sample_29 <dbl>, Sample_3 <dbl>,
#> # Sample_30 <dbl>, Sample_31 <dbl>, Sample_35 <dbl>, Sample_36 <dbl>,
#> # Sample_4 <dbl>, Sample_40 <dbl>, Sample_41 <dbl>, Sample_45 <dbl>,
#> # Sample_46 <dbl>, Sample_47 <dbl>, Sample_5 <dbl>, Sample_50 <dbl>,
#> # Sample_51 <dbl>, Sample_52 <dbl>, Sample_55 <dbl>, Sample_56 <dbl>,
#> # Sample_57 <dbl>, Sample_58 <dbl>, Sample_59 <dbl>, Sample_6 <dbl>,
#> # Sample_60 <dbl>, Sample_61 <dbl>, Sample_7 <dbl>, Sample_8 <dbl>,
#> # Sample_9 <dbl>
# Transposing with two IDs (taxon_id, otu_id)
# The column headers will be a combination of the categorical data that will
# be parsed and split back into individual columns if retransposed.
otu_abund %>%
transposer("otu_id", "samples")
#> # A tibble: 48 x 1,667
#> samples `ac<_>DQ797054.… `af<_>EU510171.… `af<_>New.Clean…
#> <chr> <dbl> <dbl> <dbl>
#> 1 Sample… 130 0 108
#> 2 Sample… 0 4 106
#> 3 Sample… 0 14 152
#> 4 Sample… 0 57 71
#> 5 Sample… 119 3 34
#> 6 Sample… 443 2 1
#> 7 Sample… 15 0 74
#> 8 Sample… 49 14 49
#> 9 Sample… 0 1 12
#> 10 Sample… 33 1 55
#> # … with 38 more rows, and 1,663 more variables:
#> # `af<_>New.CleanUp.ReferenceOTU1675` <dbl>,
#> # `af<_>New.CleanUp.ReferenceOTU1778` <dbl>,
#> # `af<_>New.CleanUp.ReferenceOTU2195` <dbl>,
#> # `af<_>New.CleanUp.ReferenceOTU6283` <dbl>,
#> # `af<_>New.ReferenceOTU1018` <dbl>, `af<_>New.ReferenceOTU1460` <dbl>,
#> # `af<_>New.ReferenceOTU1898` <dbl>, `af<_>New.ReferenceOTU662` <dbl>,
#> # `aq<_>EU462203.1.1270` <dbl>, `aq<_>FJ881207.1.1492` <dbl>,
#> # `bd<_>GQ451183.1.1507` <dbl>, `bd<_>New.CleanUp.ReferenceOTU1` <dbl>,
#> # `bd<_>New.ReferenceOTU307` <dbl>, `bv<_>AF371685.1.1445` <dbl>,
#> # `bv<_>EU462407.1.1376` <dbl>, `bv<_>EU469250.1.1356` <dbl>,
#> # `bv<_>EU777569.1.1267` <dbl>, `bv<_>GQ448792.1.1389` <dbl>,
#> # `bv<_>HQ681868.1.1479` <dbl>, `bv<_>New.CleanUp.ReferenceOTU21` <dbl>,
#> # `bv<_>New.CleanUp.ReferenceOTU82` <dbl>,
#> # `bv<_>New.ReferenceOTU648` <dbl>, `cf<_>AF371949.1.1454` <dbl>,
#> # `cf<_>New.CleanUp.ReferenceOTU19053` <dbl>,
#> # `ch<_>New.CleanUp.ReferenceOTU579` <dbl>,
#> # `cq<_>AB506276.1.1515` <dbl>, `cq<_>EU467330.1.1365` <dbl>,
#> # `cq<_>JQ185323.1.1351` <dbl>, `cq<_>JQ187807.1.1343` <dbl>,
#> # `cq<_>KC163104.1.1515` <dbl>, `cq<_>New.CleanUp.ReferenceOTU45` <dbl>,
#> # `dc<_>FJ753766.1.1449` <dbl>, `dc<_>HQ716634.1.1482` <dbl>,
#> # `dc<_>JQ191107.1.1383` <dbl>,
#> # `dc<_>New.CleanUp.ReferenceOTU5376` <dbl>,
#> # `de<_>AB506217.1.1514` <dbl>, `de<_>DQ794395.1.1386` <dbl>,
#> # `de<_>DQ801522.1.1389` <dbl>, `de<_>DQ804804.1.1390` <dbl>,
#> # `de<_>DQ806210.1.1390` <dbl>, `de<_>DQ806419.1.1375` <dbl>,
#> # `de<_>DQ810016.1.1387` <dbl>, `de<_>EF399608.1.1495` <dbl>,
#> # `de<_>EF640147.1.1517` <dbl>, `de<_>EF640148.1.1608` <dbl>,
#> # `de<_>EU462246.1.1393` <dbl>, `de<_>EU462618.1.1379` <dbl>,
#> # `de<_>EU472017.1.1394` <dbl>, `de<_>EU472558.1.1391` <dbl>,
#> # `de<_>EU761737.1.1355` <dbl>, `de<_>EU762031.1.1361` <dbl>,
#> # `de<_>EU768267.1.1370` <dbl>, `de<_>EU774369.1.1254` <dbl>,
#> # `de<_>FJ367054.1.1360` <dbl>, `de<_>FJ368433.1.1378` <dbl>,
#> # `de<_>FJ372296.1.1359` <dbl>, `de<_>FJ678233.1.1439` <dbl>,
#> # `de<_>FJ678914.1.1380` <dbl>, `de<_>FJ681481.1.1448` <dbl>,
#> # `de<_>FJ681993.1.1389` <dbl>, `de<_>FJ681994.1.1392` <dbl>,
#> # `de<_>FJ684811.1.1391` <dbl>, `de<_>FJ880611.1.1492` <dbl>,
#> # `de<_>GQ135595.1.1382` <dbl>, `de<_>HQ716148.1.1427` <dbl>,
#> # `de<_>HQ716567.1.1448` <dbl>, `de<_>HQ775630.1.1451` <dbl>,
#> # `de<_>HQ780641.1.1454` <dbl>, `de<_>HQ781391.1.1449` <dbl>,
#> # `de<_>HQ785245.1.1449` <dbl>, `de<_>HQ790863.1.1455` <dbl>,
#> # `de<_>HQ792930.1.1435` <dbl>, `de<_>HQ808140.1.1436` <dbl>,
#> # `de<_>New.CleanUp.ReferenceOTU1867` <dbl>,
#> # `de<_>New.CleanUp.ReferenceOTU2137` <dbl>,
#> # `de<_>New.CleanUp.ReferenceOTU233` <dbl>,
#> # `de<_>New.CleanUp.ReferenceOTU292` <dbl>,
#> # `de<_>New.CleanUp.ReferenceOTU3186` <dbl>,
#> # `de<_>New.CleanUp.ReferenceOTU4248` <dbl>,
#> # `de<_>New.CleanUp.ReferenceOTU446` <dbl>,
#> # `de<_>New.CleanUp.ReferenceOTU451` <dbl>,
#> # `de<_>New.CleanUp.ReferenceOTU509` <dbl>,
#> # `de<_>New.CleanUp.ReferenceOTU574` <dbl>,
#> # `de<_>New.CleanUp.ReferenceOTU66` <dbl>,
#> # `de<_>New.CleanUp.ReferenceOTU7980` <dbl>,
#> # `de<_>New.CleanUp.ReferenceOTU92` <dbl>,
#> # `de<_>New.ReferenceOTU477` <dbl>, `df<_>AB506215.1.1522` <dbl>,
#> # `df<_>DQ796118.1.1378` <dbl>, `df<_>DQ798544.1.1272` <dbl>,
#> # `df<_>DQ809209.1.1280` <dbl>, `df<_>EF401803.1.1493` <dbl>,
#> # `df<_>EU461642.1.1389` <dbl>, `df<_>EU462752.1.1365` <dbl>,
#> # `df<_>EU771433.1.1280` <dbl>, `df<_>EU772441.1.1390` <dbl>,
#> # `df<_>EU776836.1.1286` <dbl>, `df<_>EU777365.1.1388` <dbl>,
#> # `df<_>EU777419.1.1375` <dbl>, `df<_>EU778354.1.1387` <dbl>, …
# Re-Transposing with two IDs (taxon_id, otu_id)
# When you transpose categorical data, the column headers are lost.
# To re-transpose you have to supply these headers
otu_abund %>%
transposer("otu_id", "samples") %>%
transposer("samples", "otu_id",
separated_categories = c("taxon_id", "otu_id"))
#> # A tibble: 1,666 x 50
#> taxon_id otu_id Sample_1 Sample_10 Sample_11 Sample_12 Sample_13
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 ac DQ797… 130 0 0 0 119
#> 2 af EU510… 0 4 14 57 3
#> 3 af New.C… 108 106 152 71 34
#> 4 af New.C… 26 9 52 5 5
#> 5 af New.C… 0 0 15 20 10
#> 6 af New.C… 1 0 12 42 2
#> 7 af New.C… 45 19 17 22 0
#> 8 af New.R… 84 23 6 6 4
#> 9 af New.R… 1 2 25 58 0
#> 10 af New.R… 30 23 80 30 4
#> # … with 1,656 more rows, and 43 more variables: Sample_14 <dbl>,
#> # Sample_15 <dbl>, Sample_16 <dbl>, Sample_17 <dbl>, Sample_18 <dbl>,
#> # Sample_19 <dbl>, Sample_2 <dbl>, Sample_20 <dbl>, Sample_21 <dbl>,
#> # Sample_22 <dbl>, Sample_23 <dbl>, Sample_24 <dbl>, Sample_25 <dbl>,
#> # Sample_26 <dbl>, Sample_27 <dbl>, Sample_28 <dbl>, Sample_29 <dbl>,
#> # Sample_3 <dbl>, Sample_30 <dbl>, Sample_31 <dbl>, Sample_35 <dbl>,
#> # Sample_36 <dbl>, Sample_4 <dbl>, Sample_40 <dbl>, Sample_41 <dbl>,
#> # Sample_45 <dbl>, Sample_46 <dbl>, Sample_47 <dbl>, Sample_5 <dbl>,
#> # Sample_50 <dbl>, Sample_51 <dbl>, Sample_52 <dbl>, Sample_55 <dbl>,
#> # Sample_56 <dbl>, Sample_57 <dbl>, Sample_58 <dbl>, Sample_59 <dbl>,
#> # Sample_6 <dbl>, Sample_60 <dbl>, Sample_61 <dbl>, Sample_7 <dbl>,
#> # Sample_8 <dbl>, Sample_9 <dbl>
# Transforming to proportions/percentages by COLUMN
taxa_abund %>%
transformer(func = ~./sum(.))
#> # A tibble: 175 x 49
#> taxon_id Sample_1 Sample_2 Sample_3 Sample_6 Sample_9 Sample_10
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 ac 1.73e-1 1.75e-1 1.71e-1 1.69e-1 1.71e-1 0.171
#> 2 af 8.27e-2 6.51e-2 8.16e-2 6.78e-2 6.80e-2 0.109
#> 3 ag 6.47e-3 2.31e-2 7.75e-4 5.58e-3 1.21e-3 0.000931
#> 4 ah 6.50e-3 1.44e-3 9.05e-3 9.40e-4 2.18e-3 0.00146
#> 5 ai 1.04e-4 2.70e-5 1.45e-4 1.46e-6 2.59e-5 0.0000446
#> 6 aj 7.14e-2 8.46e-2 7.64e-2 9.29e-2 9.87e-2 0.0585
#> 7 ak 2.75e-3 1.47e-4 3.80e-4 9.21e-5 7.60e-5 0.0000330
#> 8 al 1.69e-5 8.29e-5 5.47e-5 1.46e-6 3.71e-6 0.0000136
#> 9 an 2.34e-3 2.28e-4 2.22e-4 7.31e-6 5.74e-5 0.0000737
#> 10 ao 7.83e-5 1.66e-5 9.02e-5 6.14e-4 2.72e-4 0
#> # … with 165 more rows, and 42 more variables: Sample_13 <dbl>,
#> # Sample_14 <dbl>, Sample_17 <dbl>, Sample_21 <dbl>, Sample_22 <dbl>,
#> # Sample_24 <dbl>, Sample_25 <dbl>, Sample_26 <dbl>, Sample_30 <dbl>,
#> # Sample_50 <dbl>, Sample_59 <dbl>, Sample_60 <dbl>, Sample_61 <dbl>,
#> # Sample_7 <dbl>, Sample_27 <dbl>, Sample_45 <dbl>, Sample_5 <dbl>,
#> # Sample_57 <dbl>, Sample_20 <dbl>, Sample_29 <dbl>, Sample_11 <dbl>,
#> # Sample_12 <dbl>, Sample_15 <dbl>, Sample_16 <dbl>, Sample_18 <dbl>,
#> # Sample_19 <dbl>, Sample_23 <dbl>, Sample_28 <dbl>, Sample_31 <dbl>,
#> # Sample_35 <dbl>, Sample_40 <dbl>, Sample_41 <dbl>, Sample_46 <dbl>,
#> # Sample_47 <dbl>, Sample_51 <dbl>, Sample_55 <dbl>, Sample_56 <dbl>,
#> # Sample_58 <dbl>, Sample_8 <dbl>, Sample_4 <dbl>, Sample_52 <dbl>,
#> # Sample_36 <dbl>
# Transforming by ROW is also possible
# This function will transpose/retranspose so other information is needed
taxa_abund %>%
transformer(by = "row",
func = ~./sum(.),
ids = "taxon_id",
header_name = "samples",
separated_categories = c("taxon_id"))
#> # A tibble: 175 x 49
#> taxon_id Sample_1 Sample_10 Sample_11 Sample_12 Sample_13 Sample_14
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 ac 0.0241 0.0189 0.0160 0.0173 0.0250 0.0193
#> 2 af 0.0279 0.0292 0.0206 0.0217 0.0267 0.0171
#> 3 ag 0.0325 0.00370 0.00430 0.00485 0.00857 0.00270
#> 4 ah 0.0326 0.00577 0.0549 0.0249 0.00929 0.0000692
#> 5 ai 0.00688 0.00233 0.00213 0.00142 0.00132 0
#> 6 aj 0.0191 0.0124 0.0111 0.0143 0.0256 0.0231
#> 7 ak 0.232 0.00220 0.00945 0.0245 0.00608 0
#> 8 al 0.00441 0.00281 0.00201 0.00482 0.0108 0.00441
#> 9 an 0.285 0.00711 0.00917 0 0.0204 0
#> 10 ao 0.00928 0 0 0 0.00200 0
#> # … with 165 more rows, and 42 more variables: Sample_15 <dbl>,
#> # Sample_16 <dbl>, Sample_17 <dbl>, Sample_18 <dbl>, Sample_19 <dbl>,
#> # Sample_2 <dbl>, Sample_20 <dbl>, Sample_21 <dbl>, Sample_22 <dbl>,
#> # Sample_23 <dbl>, Sample_24 <dbl>, Sample_25 <dbl>, Sample_26 <dbl>,
#> # Sample_27 <dbl>, Sample_28 <dbl>, Sample_29 <dbl>, Sample_3 <dbl>,
#> # Sample_30 <dbl>, Sample_31 <dbl>, Sample_35 <dbl>, Sample_36 <dbl>,
#> # Sample_4 <dbl>, Sample_40 <dbl>, Sample_41 <dbl>, Sample_45 <dbl>,
#> # Sample_46 <dbl>, Sample_47 <dbl>, Sample_5 <dbl>, Sample_50 <dbl>,
#> # Sample_51 <dbl>, Sample_52 <dbl>, Sample_55 <dbl>, Sample_56 <dbl>,
#> # Sample_57 <dbl>, Sample_58 <dbl>, Sample_59 <dbl>, Sample_6 <dbl>,
#> # Sample_60 <dbl>, Sample_61 <dbl>, Sample_7 <dbl>, Sample_8 <dbl>,
#> # Sample_9 <dbl>
This function was # borrowed from .
# Get analyzed Data
metacoder_obj <- as_MicrobiomeR_format(obj = metacoder_obj, format = "analyzed_format")
# Create agglomerated taxmaps for phylum/class
phylum_mo <- agglomerate_taxmap(obj = metacoder_obj, rank = "Phylum")
class_mo <- agglomerate_taxmap(obj = metacoder_obj, rank = "Class")
# Get some observation data
phylum_data <- phylum_mo$data$stats_tax_data
class_data <- class_mo$data$stats_tax_data
# Take the Phylum in the "phylum_data" and cross reference these in the "class_data".
# Reutrn the "wilcox_p_value" of the class_data.
class_p_value <- vlookup(lookup_vector = phylum_data$Phylum,
df = class_data,
match_var = "Phylum",
return_var = "wilcox_p_value")
# Create a new column in the phylum_data.
new_data <- phylum_data %>% dplyr::mutate(class_p_value = class_p_value)
new_data[c("taxon_id", "Phylum", "Class", "class_p_value", "wilcox_p_value")]
#> # A tibble: 13 x 5
#> taxon_id Phylum Class class_p_value wilcox_p_value
#> <chr> <fct> <fct> <dbl> <dbl>
#> 1 ac <NA> <NA> NaN NaN
#> 2 af Firmicutes <NA> 3.40e-5 0.0000340
#> 3 ag Proteobacteria <NA> 3.69e-1 0.369
#> 4 ah Spirochaetae <NA> 6.65e-2 0.0665
#> 5 ai Verrucomicrobia <NA> 8.12e-3 0.00812
#> 6 aj Bacteroidetes <NA> 3.78e-4 0.000378
#> 7 ak Fibrobacteres <NA> 3.02e-2 0.0302
#> 8 al Actinobacteria <NA> 9.24e-8 0.0000000924
#> 9 an Cyanobacteria <NA> 6.69e-2 0.0669
#> 10 ao Elusimicrobia <NA> 3.94e-1 0.394
#> 11 aq Saccharibacteria <NA> NA 0.000895
#> 12 as Planctomycetes <NA> 1.48e-3 0.00148
#> 13 at Tenericutes <NA> 5.46e-1 0.546