The main function ggVennDiagram()
accepts a list input,
and output a ggplot object. By measuring the length of input list, it
automatically applies internal functions to build a plot in two steps:
data preparation and visualization.
Data preparation was packaged into one function
process_data()
. Its output is a S4
VennPlotData
class object, which contains three slots,
setEdge
, setLabel
and region
.
These slot data then can be further plotted with ggplot
functions.
See below for a better understanding.
Generate example data.
genes <- paste0("gene",1:1000)
set.seed(20231214)
gene_list <- list(A = sample(genes,100),
B = sample(genes,200),
C = sample(genes,300),
D = sample(genes,200))
Then we can reproduce the plot of ggVennDiagram()
with
several lines.
venn <- Venn(gene_list)
data <- process_data(venn)
ggplot() +
# 1. region count layer
geom_polygon(aes(X, Y, fill = count, group = id),
data = venn_regionedge(data)) +
# 2. set edge layer
geom_path(aes(X, Y, color = id, group = id),
data = venn_setedge(data),
show.legend = FALSE) +
# 3. set label layer
geom_text(aes(X, Y, label = name),
data = venn_setlabel(data)) +
# 4. region label layer
geom_label(aes(X, Y, label = count),
data = venn_regionlabel(data)) +
coord_equal() +
theme_void()
The variable data
is a structured list that has three
slots.
data
#> Class VennPlotData - '401f'
#> Type: ellipse; No. sets: 4; No. regions: 15.
#> To view this shape, use `plot_shape_edge(get_shape_by_id('401f'))`.
#> To view its components, use `venn_setedge()`, `venn_setlabel()`, etc.
ggVennDiagram
export functions to get these data, and
they can be used for comprehensive customization in user-side.
Venn()
: Venn object constructor, use this to construct
a Venn object from list.process_data()
: process data with a Venn objectvenn_regionedge()
: get region edge data to plotvenn_regionlabel()
: get region label position to
plotvenn_setedge()
: get setedge data to plotvenn_setlabel()
: get setlabel data to plotFor example, you may change edge/fill/label properties as you will.
ggplot() +
# change mapping of color filling
geom_polygon(aes(X, Y, fill = id, group = id),
data = venn_regionedge(data),
show.legend = FALSE) +
# adjust edge size and color
geom_path(aes(X, Y, color = id, group = id),
data = venn_setedge(data),
linewidth = 3,
show.legend = FALSE) +
# show set label in bold
geom_text(aes(X, Y, label = name),
fontface = "bold",
data = venn_setlabel(data)) +
# add a alternative region name
geom_label(aes(X, Y, label = id),
data = venn_regionlabel(data),
alpha = 0.5) +
coord_equal() +
theme_void()
set.seed(20231225)
y = list(
A = sample(letters, 8) |> sort(),
B = sample(letters, 8) |> sort(),
C = sample(letters, 8) |> sort(),
D = sample(letters, 8) |> sort())
# view the list
y
#> $A
#> [1] "a" "e" "g" "o" "p" "s" "t" "v"
#>
#> $B
#> [1] "a" "d" "f" "i" "k" "s" "y" "z"
#>
#> $C
#> [1] "b" "g" "k" "o" "r" "s" "u" "w"
#>
#> $D
#> [1] "b" "c" "e" "h" "k" "q" "s" "y"
To view subset itmes interactively, set
show_intersect = TRUE
.
ggVennDiagram(y, show_intersect = TRUE, set_color = "black")
#> Warning in geom_text(aes(label = .data$count, text = .data$item), data =
#> region_label): Ignoring unknown aesthetics: text
venn_y = Venn(y)
venn_y
#> An object of class 'Venn':
#> Slots: sets, names;
#> No. Sets: 4 SetNames: A, B, C, D.
# find the overlaping members of two or more sets
overlap(venn_y, 1:2) # members in both the first two sets
#> [1] "a" "s"
overlap(venn_y) # members in all the sets
#> [1] "s"
# find the different members between sets and set unions
discern(venn_y, 1) # members in set 1, but not in all the resting sets by default
#> [1] "p" "t" "v"
discern(venn_y, c("A","B"), 3) # members in set A & B, but not in the third set
#> [1] "a" "e" "p" "t" "v" "d" "f" "i" "y" "z"
# find the specific members in one or more sets
discern_overlap(venn_y, 1) # specific items in set 1, equals to `discern(venn_y, 1)`. Those members are not shared by all the other sets.
#> [1] "p" "t" "v"
discern_overlap(venn_y, 1:2) # specific items in set 1 and set 2
#> [1] "a"
venn_plot_data = process_data(venn_y)
# summary of VennPlotData object
venn_plot_data
#> Class VennPlotData - '401f'
#> Type: ellipse; No. sets: 4; No. regions: 15.
#> To view this shape, use `plot_shape_edge(get_shape_by_id('401f'))`.
#> To view its components, use `venn_setedge()`, `venn_setlabel()`, etc.
Sets and labels
# get the set data
venn_set(venn_plot_data)
#> # A tibble: 4 × 4
#> id name item count
#> <chr> <chr> <named list> <int>
#> 1 1 A <chr [8]> 8
#> 2 2 B <chr [8]> 8
#> 3 3 C <chr [8]> 8
#> 4 4 D <chr [8]> 8
# get subsets, i.e., regions
venn_region(venn_plot_data)
#> # A tibble: 15 × 4
#> id name item count
#> <chr> <chr> <list> <int>
#> 1 1 A <chr [3]> 3
#> 2 2 B <chr [4]> 4
#> 3 3 C <chr [3]> 3
#> 4 4 D <chr [3]> 3
#> 5 1/2 A/B <chr [1]> 1
#> 6 1/3 A/C <chr [2]> 2
#> 7 1/4 A/D <chr [1]> 1
#> 8 2/3 B/C <chr [0]> 0
#> 9 2/4 B/D <chr [1]> 1
#> 10 3/4 C/D <chr [1]> 1
#> 11 1/2/3 A/B/C <chr [0]> 0
#> 12 1/2/4 A/B/D <chr [0]> 0
#> 13 1/3/4 A/C/D <chr [0]> 0
#> 14 2/3/4 B/C/D <chr [1]> 1
#> 15 1/2/3/4 A/B/C/D <chr [1]> 1
Polygons.
# get set edge
venn_setedge(venn_plot_data)
#> # A tibble: 404 × 3
#> id X Y
#> <chr> <dbl> <dbl>
#> 1 1 0.103 0.717
#> 2 1 0.0941 0.708
#> 3 1 0.0867 0.698
#> 4 1 0.0804 0.687
#> 5 1 0.0751 0.675
#> 6 1 0.0709 0.662
#> 7 1 0.0678 0.648
#> 8 1 0.0659 0.634
#> 9 1 0.0650 0.619
#> 10 1 0.0653 0.603
#> # ℹ 394 more rows
# get region edge
venn_regionedge(venn_plot_data)
#> # A tibble: 713 × 6
#> id X Y name item count
#> <chr> <dbl> <dbl> <chr> <list> <int>
#> 1 1 0.103 0.717 A <chr [3]> 3
#> 2 1 0.112 0.726 A <chr [3]> 3
#> 3 1 0.122 0.733 A <chr [3]> 3
#> 4 1 0.133 0.740 A <chr [3]> 3
#> 5 1 0.145 0.745 A <chr [3]> 3
#> 6 1 0.158 0.749 A <chr [3]> 3
#> 7 1 0.172 0.752 A <chr [3]> 3
#> 8 1 0.186 0.754 A <chr [3]> 3
#> 9 1 0.201 0.755 A <chr [3]> 3
#> 10 1 0.217 0.755 A <chr [3]> 3
#> # ℹ 703 more rows
df = venn_setedge(venn_plot_data)
plot(df$X, df$Y, asp = 1)