Workshopping some Plots






Today’s R Club will focus on workshopping one straightforward data wrangling task, and plotting data from Dani’s and Arian’s Intentional Regulation of Craving study.

Our tasks:

  • Build on the graphs already made (e.g. add titles, change labels, change colors, etc.)
  • Use the imageCats.csv to create columns for the image category and image number (i.e. if choices are 1=chocolate, 2=donuts, then first01.jpg –> chocolate01, second07.jpg –> donuts07, etc.).
  • Use new image info to further explore the data

Check out past posts on ggplot (especially the ones at the bottom) to get some help on plotting. You also might want to read about the philosophy of ggplot. The diagram below may also help you get a sense for how ggplot works.

Load packages

library(dplyr)
library(tidyr)
library(ggplot2)
library(psych)
library(knitr)

Import

Data = read.csv("FP_DataTable.csv", header=TRUE);
ImageCats = read.csv("ImageCats.csv", header=TRUE);

Tidy

# Recode missing data
Data[Data == "NaN"] = "NA";

# Set variable types
Data$ReportResp <- as.factor(Data$ReportResp);
levels(Data$ReportResp) <- c("look", "regulate", "NA");

Data$RateResp <- as.numeric(Data$RateResp);
Data$RateRT <- as.numeric(Data$RateRT);
Data$RatingResp <- as.numeric(Data$RatingResp);
Data$RatingRT <- as.numeric(Data$RatingRT);

# Recode button box responses
Data <- Data %>% mutate(RateResp = RateResp-4)

# Define conditions
Data <- Data %>% mutate(Condition=(paste0(Instruction, ReportResp)))

Data$Condition[Data$Condition == "looklook"] = "cued_look";
Data$Condition[Data$Condition == "regulateregulate"] = "cued_regulate";
Data$Condition[Data$Condition == "chooselook"] = "int_look";
Data$Condition[Data$Condition == "chooseregulate"] = "int_regulate";

# Recode NA trials
Data$Condition[Data$Condition == "chooseNA"] = "NA"
Data$Condition[Data$Condition == "lookNA"] = "NA"
Data$Condition[Data$Condition == "regulateNA"] = "NA"
Data$Condition[Data$Condition == "lookregulate"] = "NA"
Data$Condition[Data$Condition == "regulatelook"] = "NA"

# Set Condition as a factor
Data$Condition = as.factor(Data$Condition)

Descriptives

# get rid of NA trials so that only cued_look, cued_regulate, int_look, int_regulate are left
Data1 = Data %>% filter(grepl("int|cued", Condition))

# get ns for each condition
Data1 %>% group_by(Condition) %>% count(Condition)
## Source: local data frame [4 x 2]
## 
##       Condition     n
##          (fctr) (int)
## 1     cued_look   654
## 2 cued_regulate   646
## 3      int_look   682
## 4  int_regulate   660
# for each subject, get the number of intentional look and regulates
conditionNs <- Data1 %>% group_by(SubjectID) %>% count(SubjectID, Condition) %>% filter(grepl('^int', Condition))

# graph boxplot of each condition
p <- ggplot(conditionNs, aes(factor(Condition), n))
p + geom_boxplot()

# compute ratio of intentional look to intentional regulate trials
ratioNs <- conditionNs %>% group_by(SubjectID,Condition) %>% spread(Condition,n) %>% mutate(ratio = int_look/int_regulate)

# plot the distribution of ratios
p <- ggplot(ratioNs, aes(x=ratio))
p + geom_histogram(binwidth=.1, aes(fill = ..count..)) +
    labs(title = "Distribution of ratios of intentional look trials to intentional regulate trials")

# summarize data
describeBy(Data1$RateResp, Data1$Condition)
## group: cued_look
##   vars   n mean  sd median trimmed  mad min max range  skew kurtosis   se
## 1    1 651 3.69 1.2      4    3.83 1.48   1   5     4 -0.69    -0.45 0.05
## -------------------------------------------------------- 
## group: cued_regulate
##   vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## 1    1 645  2.4 1.06      2    2.32 1.48   1   5     4 0.61    -0.19 0.04
## -------------------------------------------------------- 
## group: int_look
##   vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
## 1    1 681 3.59 1.19      4     3.7 1.48   1   5     4 -0.57    -0.58 0.05
## -------------------------------------------------------- 
## group: int_regulate
##   vars   n mean   sd median trimmed  mad min max range skew kurtosis   se
## 1    1 655 2.45 1.02      2     2.4 1.48   1   5     4 0.43    -0.35 0.04
## -------------------------------------------------------- 
## group: NA
## NULL
# boxplots for task ratings by condition
p <- ggplot(Data1, aes(factor(Condition), RateResp))
p + geom_boxplot()
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).

# histograms for task ratings by condition
p <- ggplot(Data1, aes(x=RateResp))
p + geom_histogram(binwidth=1, aes(fill = ..count..)) + 
    xlim(1, 5) + 
    facet_wrap(~ Condition, nrow = 2) +
    labs(title = "Distribution of trials")

Visualize task data

# create plot.rate data frame for task ratings
plot.rate = Data1 %>% group_by(Condition) %>% summarise(meanRateResp = mean(RateResp, na.rm=TRUE), sd = sd(RateResp, na.rm=TRUE), n = n(), sem=sd(RateResp, na.rm=TRUE)/sqrt(n), ci=qt(0.975,df=n-1)*sem)

plot.rate$Cue = c("cued", "cued", "intentional", "intentional")
plot.rate$Condition = c("look", "regulate","look","regulate")
plot.rate$Condition = as.factor(plot.rate$Condition)

# create plot.rt data frame for reaction times
plot.rt = Data1 %>% group_by(Condition) %>% summarise(meanRT = mean(RateRT, na.rm=TRUE), sd = sd(RateRT, na.rm=TRUE), n = n(), sem=sd(RateRT, na.rm=TRUE)/sqrt(n), ci=qt(0.975,df=n-1)*sem)

plot.rt$Cue = c("cued", "cued", "intentional", "intentional")
plot.rt$Condition = c("look", "regulate","look","regulate")
plot.rt$Condition = as.factor(plot.rt$Condition)

# create plot.rate data frame for post-task ratings
plot.rating = Data1 %>% group_by(Condition) %>% summarise(meanRating = mean(RatingResp, na.rm=TRUE), sd = sd(RatingResp, na.rm=TRUE), n = n(), sem=sd(RatingResp, na.rm=TRUE)/sqrt(n), ci=qt(0.975,df=n-1)*sem) %>% filter(Condition != 'NA')

plot.rating$Cue = c("cued", "cued", "intentional", "intentional")
plot.rating$Condition = c("look", "regulate","look","regulate")
plot.rating$Condition = as.factor(plot.rating$Condition)

# plot mean RateResp by Condition with error bars
limits=aes(ymax=plot.rate$meanRateResp + ci, ymin=plot.rate$meanRateResp - ci)
dodge = position_dodge(width=0.1)

p = ggplot(plot.rate, aes(x=Condition, y=meanRateResp, colour=Cue))
p + geom_line(aes(group=Cue), position=dodge, size=1.5) + 
    geom_errorbar(limits, position=dodge, width=0.05, size=1.5) +
    ylab("Mean Rating")
## ymax not defined: adjusting position using y instead

# plot mean RateRT by Condition with error bars
limits=aes(ymax=plot.rt$meanRT + ci, ymin=plot.rt$meanRT - ci)
dodge = position_dodge(width=0.1)

p = ggplot(plot.rt, aes(x=Condition, y=meanRT, colour=Cue))
p + geom_line(aes(group=Cue), position=dodge, size=1.5) + 
    geom_errorbar(limits, position=dodge, width=0.05, size=1.5) +
    ylab("Mean Rating RT") 
## ymax not defined: adjusting position using y instead

# plot mean Rating by Condition with error bars
limits=aes(ymax=plot.rating$meanRating + ci, ymin=plot.rating$meanRating - ci)
dodge = position_dodge(width=0.1)

p = ggplot(plot.rating, aes(x=Condition, y=meanRating, colour=Cue))
p + geom_line(aes(group=Cue), position=dodge, size=1.5) + 
    geom_errorbar(limits, position=dodge, width=0.05, size=1.5) +
    ylab("Mean Post-Task Rating")
## ymax not defined: adjusting position using y instead

From Dani:

The main question is whether or not choosing to look or regulate depends on stimulus desirability. I wasn’t sure how to do this, but tried in the last graph. The x-axis is the task rating of desirability, the y-axis is the post-task rating of desirbaility and the groups are 2=look, 3=regulate, and missed trials (NA).



2 comments

  1. John Flournoy

    One possible solution to the data wrangling problem:

    imgCat.l <- ImageCats %>%
    mutate_each(funs(as.character)) %>%
    gather(catNumChar, catName, -SubjectID) %>%
    mutate(catNum = as.numeric(sub('Cat', '', catNumChar)))

    Data.1 <- Data %>%
    extract(Images, c('catNameChar', 'imageNum'), regex='(\\w+)([0-9]{2})\\.jpg') %>%
    mutate(catNum=c(`first`=1,`second`=2, `third`=3)[catNameChar])

    Data.ImageCats <- left_join(Data.1, imgCat.l) %>%
    mutate(catNameNum = as.numeric(as.factor(catName)))

    • rosem@uoregon.edu

      Another, similar solution!

      # make Images into better names
      Data <- Data %>%
      extract(col=Images, into=c("Order", "num"), regex="([[:alpha:]]+)([[:digit:]]+)" )

      ImageCats <- ImageCats %>%
      gather(key=Order, value=SubCat, -SubjectID) %>%
      extract(col=Order, into="Order", regex="([[:digit:]]+)" )

      ImageCats$Order <- ifelse(ImageCats$Order=="1", "first", ifelse(ImageCats$Order=="2", "second", ifelse(ImageCats$Order=="3", "third", ImageCats$Order))) Hooray <- left_join(Data, ImageCats, by=c("SubjectID", "Order")) %>%
      unite(Image, SubCat, num)