#remotes::install_github("millacarmona/morphospace")
library(morphospace)
library(geomorph)
library(phytools)
library(Morpho)
library(tidyverse)
library(Rvcg)
library(rgl)
library(scatterplot3d)
6: Morphospace Plots
What is a morphospace and what is it good for?
Lets practice making nice morphospace plots using some landmark data. Morphospace plots are just a fancy way of saying “ordination plot.” With GMM data, we are typically talking about plotting principal component analyses (PCA).
Here is an example:
There is a lot of data packed into this plot! The most important this is seeing which data points (in this case, species, but it could be individuals) are similar and different from one another. The colors and shapes tell us about the phylogenetic affinities and whether the species is extinct. The images on the x and y axes show us which aspects of shape are most variable and how shape variation partitions specimens.
Lets try to recreate something like this!
#import data:
<-read.csv('./Data/crocs.csv',row.names = 1)
raw_data<- arrayspecs(raw_data, p=ncol(raw_data)/3, k=3)
shape_data #remove one specimen that has no ecology data:
<- which(dimnames(shape_data)[[3]]== "Crocodylus_biporcatus")
bad_specimen <- shape_data[,,-bad_specimen]
shape_data #phylogeny:
<- read.nexus('./Data/CrocTree2.nex')
phylo1 #ecological data
<- read_csv('./Data/croc_ecology_data.csv') eco_dat
Rows: 43 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (11): Clade2, Clade, Genus, Species, Taxon, Tip_label, Habitat, Diet, Di...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#make sure tips and data are in same order
<- shape_data[,,eco_dat$Tip_label] shape_data
Now do a Procrustes superimposition and carry out a principal components analysis of the aligned landmark coordinates.
<- gpagen(shape_data) gpa
Performing GPA
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
Making projections... Finished!
<- gm.prcomp(gpa$coords)
PCA #look at the morphospace
plot(PCA)
Pretty boring, right? and there really isn’t much we can learn from this on its own.
Some of the things we might want to explore/visualize in a morphospace:
how is shape variation distributed?
do species with similar ecology cluster together?
do species with similar body size have similar skull shapes?
The morphospace
package makes it very easy to do these sorts of data explorations
#define some good color-blind friendly colors:
#see https://davidmathlogic.com/colorblind/#%23D81B60-%231E88E5-%23FFC107-%23004D40
<-c("#D81B60","#1E88E5","#FFC107","#004D40","black")
cbb_colors<-as.factor(eco_dat$Habitat)
habitatsnames(habitats)<-eco_dat$Tip_label
<-as.factor(eco_dat$extinct)
fossilsnames(fossils)<-eco_dat$Tip_label
<-cbb_colors[habitats]
habitat_colorsmspace(gpa$coords)
Preparing for snapshot: rotate mean shape to the desired orientation
(don't close or minimize the rgl device).Press <Enter> in the console to continue:
This can take a few seconds...
DONE.
<-mspace(gpa$coords, plot=FALSE) %>%
msp# scatter points
proj_shapes(shapes = gpa$coords, col = habitat_colors) %>%
# convex hulls enclosing groups
proj_groups( groups = habitats, col=cbb_colors[1:3], alpha = 0.5) %>%
plot_mspace(legend = TRUE, cex.legend = .7)
I Still think those clusters of landmarks are hard to interpret- lets use warps of a mesh of a specimen
#load in a mesh
<-vcgImport("./Data/Alligator_mississippiensis.ply")
m1shade3d(m1, col="#E4D1C0", specular="black")
#make a "hypothetical shape",
#the mean of all specimens,
#by warping the skull of the alligator to the landmark configuration of the mean specimen
<- warpRefMesh(mesh=m1,
mean_skull mesh.coord = shape_data[,,"Alligator_mississippiensis"],
$consensus) gpa
and now plot with 3D models representing shape change:
mspace(gpa$coords, template= mean_skull,bg.model = "gray",
cex.ldm = 0, adj_frame = c(0.9, 0.85)) %>%
# scatter points
proj_shapes(shapes = gpa$coords, col = habitat_colors)
Preparing for snapshot: rotate mean shape to the desired orientation
(don't close or minimize the rgl device).Press <Enter> in the console to continue:
This can take a few seconds...
DONE.
Phylomorphospace
Another useful way of using morphospace is to project the phylogenetic tree into the ordination so that we can make hypotheses about how evolutionary relationships relate to phenotypic similarities or differences. For example, this will help us see if there could be convergent evolution if we see two species with very similar shapes (positions in morphospace) that have ancestors in completely different parts of morphospace.
<-eco_dat[match(phylo1$tip.label, eco_dat$Tip_label),]
eco_dat_phylo<-as.factor(eco_dat_phylo$Habitat)
habitats_phylonames(habitats_phylo)<-eco_dat_phylo$Tip_label
<-cbb_colors[habitats_phylo]
tip_colors
<-mspace(gpa$coords, models=FALSE) %>%
msp# scatter points
proj_shapes(shapes = gpa$coords, col = habitat_colors) %>%
# phylogenetic relationships
proj_phylogeny(shapes = gpa$coords, tree = phylo1, col.tips=tip_colors)
Preparing for snapshot: rotate mean shape to the desired orientation
(don't close or minimize the rgl device).Press <Enter> in the console to continue:
This can take a few seconds...
DONE.
Do we notice any convergence?
See if you can add tip labels to the plot so you can see which taxa are (potentially) converging!
Other Ways of Plotting Morphospace
Let’s try a different type of morphospace plot that is particularly relevant to us as paleontologists.
As paleontologists, we might be interested in how morphological variation expands and contracts through time.
First, lets prep by:
calculating the % variance explained by each PC axis so we can add that to the axis labels
We need a way to tell R how far apart to place the grids that represent time periods. We have a vector,
eco_dat$time
, that lists the time bin in which each species lived. We need to change these to equally spaced numerical bins that will become the z-axis values for the plot.
<-invisible(summary(PCA)) PCA_summary
Ordination type: Principal Component Analysis
Centering by OLS mean
Orthogonal projection of OLS residuals
Number of observations: 43
Number of vectors 42
Importance of Components:
Comp1 Comp2 Comp3 Comp4
Eigenvalues 0.01863466 0.004560141 0.002363625 0.001798776
Proportion of Variance 0.48157394 0.117847348 0.061082951 0.046485627
Cumulative Proportion 0.48157394 0.599421285 0.660504236 0.706989863
Comp5 Comp6 Comp7 Comp8
Eigenvalues 0.001503002 0.00105661 0.0008890967 0.0007642375
Proportion of Variance 0.038841957 0.02730589 0.0229768510 0.0197501259
Cumulative Proportion 0.745831819 0.77313771 0.7961145639 0.8158646898
Comp9 Comp10 Comp11 Comp12
Eigenvalues 0.000690656 0.0006627848 0.0005826994 0.0005479097
Proportion of Variance 0.017848564 0.0171282911 0.0150586522 0.0141595852
Cumulative Proportion 0.833713254 0.8508415449 0.8659001971 0.8800597823
Comp13 Comp14 Comp15 Comp16
Eigenvalues 0.0004476335 0.0004215015 0.0003661049 0.0003399192
Proportion of Variance 0.0115681536 0.0108928264 0.0094612179 0.0087845023
Cumulative Proportion 0.8916279358 0.9025207622 0.9119819801 0.9207664824
Comp17 Comp18 Comp19 Comp20
Eigenvalues 0.0002878028 0.0002708908 0.000245226 0.0002297029
Proportion of Variance 0.0074376629 0.0070006089 0.006337354 0.0059361919
Cumulative Proportion 0.9282041453 0.9352047542 0.941542108 0.9474783002
Comp21 Comp22 Comp23 Comp24
Eigenvalues 0.000220447 0.0001809923 0.0001791846 0.0001552528
Proportion of Variance 0.005696993 0.0046773698 0.0046306526 0.0040121846
Cumulative Proportion 0.953175293 0.9578526632 0.9624833158 0.9664955004
Comp25 Comp26 Comp27 Comp28
Eigenvalues 0.0001454897 0.0001343652 0.0001206231 0.0001176935
Proportion of Variance 0.0037598775 0.0034723885 0.0031172529 0.0030415433
Cumulative Proportion 0.9702553779 0.9737277664 0.9768450193 0.9798865626
Comp29 Comp30 Comp31 Comp32
Eigenvalues 0.0001023449 8.575332e-05 8.469609e-05 7.284477e-05
Proportion of Variance 0.0026448907 2.216116e-03 2.188794e-03 1.882521e-03
Cumulative Proportion 0.9825314533 9.847476e-01 9.869364e-01 9.888189e-01
Comp33 Comp34 Comp35 Comp36
Eigenvalues 0.0000659296 6.345064e-05 5.404416e-05 0.0000490079
Proportion of Variance 0.0017038131 1.639750e-03 1.396659e-03 0.0012665069
Cumulative Proportion 0.9905226975 9.921624e-01 9.935591e-01 0.9948256126
Comp37 Comp38 Comp39 Comp40
Eigenvalues 4.711339e-05 4.190097e-05 3.529723e-05 2.894756e-05
Proportion of Variance 1.217547e-03 1.082843e-03 9.121833e-04 7.480893e-04
Cumulative Proportion 9.960432e-01 9.971260e-01 9.980382e-01 9.987863e-01
Comp41 Comp42
Eigenvalues 2.814711e-05 1.881835e-05
Proportion of Variance 7.274033e-04 4.863210e-04
Cumulative Proportion 9.995137e-01 1.000000e+00
<-as.factor(eco_dat$time)
time_periodlevels(time_period)<-c(40,20,0,60)
<-as.numeric(as.character(time_period)) time_period2
Now we can build the plot
#stacked morphospace
<-scatterplot3d(x = PCA$x[,1], #PC axis 1 scores
plot3d1y = PCA$x[,2], #PC axis 2 scores
z = time_period2, #time bins
pch =19, #shape of the points
angle = 10,
color=habitat_colors, #color the points
z.ticklabs=c("Jurassic",
"",
"Cretaceous",
"",
"Cenozoic",
"",
"Modern"),
xlab=paste0("PC Axis 1 (",
signif(PCA_summary$PC.summary[2,1],3)*100,
"% of total variance)"),
ylab=paste0("PC Axis 2 (",
signif(PCA_summary$PC.summary[2,2],3)*100,
"% of total variance)"),
label.tick.marks = TRUE,
zlab="Geologic Time Period",
#additional graphics options for a clean plot
box=F,x.ticklabs=NA,y.ticklabs=NA)
$plane3d(20,0,0,lty="solid",col="grey")
plot3d1$plane3d(40,0,0,lty="solid",col="grey")
plot3d1$plane3d(60,0,0,lty="solid",col="grey")
plot3d1$points3d(x = PCA$x[,1],
plot3d1y = PCA$x[,2],
z = time_period2,
pch =19,
col=habitat_colors)
looks pretty good!
Try experimenting with the “angle” argument in
scatterplot3d
to see how you can change the appearance of the plot.What does this tell us about morphospace occupation through time and changes in morphological disparity through time?
What are some issues with this dataset that this plot reveals?