This project looks at data from the micro-campaign #endrainbow on twitter. The aim of the campaign was to convince climate and earth-system
scientists to stop using rainbow-based colour palettes (such as matlab’s Jet, or R’s Spectral) as defaults in their scientific visualisations.
The campaign had some success, with big journals like Nature changing their editorial policies on colour palettes, although it’s difficult
attribute these successes to the #endrainbow campaign.
Other useful data would include:
Counts of rainbow figures in journals through time.
Count number of journals with policies through time.
Reproduce rainbow threshold game.
Useful link for text mining
Two spreadsheets, covering 2017 - 2019 and 2019 - 2023 converted to csv from google sheets. Each row is a tweet.
There are four columns:
handle: User’s Twitter handle
text: Text of the tweet
url: URL of the tweet
time: Date and time of the tweet in UTC.
tweets <- read_csv('data/end_rainbow_tweets.csv', col_names = c('handle', 'text', 'url', 'time'))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## handle = col_character(),
## text = col_character(),
## url = col_character(),
## time = col_character()
## )
tweets1 <- read_csv('data/end_rainbow_tweets_1.csv', col_names = c('handle', 'text', 'url', 'time'))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## handle = col_character(),
## text = col_character(),
## url = col_character(),
## time = col_character()
## )
tweets_merged <- rbind(tweets, tweets1)
length(unique(tweets_merged$handle))
## [1] 1921
head(sort(table(tweets_merged$handle), decreasing = TRUE), 20)
##
## @fcrameri @dougmcneall @kennethfield @AchimZeileis
## 72 54 40 38
## @RetoStauffer2 @AndyDoggerBank @opensource_orgs @philipheron
## 27 24 23 22
## @ed_hawkins @jscarto @rstatstweet @michistoelzle
## 19 19 18 16
## @threadreaderapp @JamesPope10 @My_Carta @BetterFigures
## 16 15 15 14
## @ShepGracie @CEEDOslo @kwinkunks @obspy
## 14 13 12 12
Convert the odd time format into something R understands
tweet_time <- mdy_hm(tweets_merged$time)
par(las = 1)
tweet_hist <- hist(tweet_time,
breaks = 'months',
freq = TRUE,
main = "#endrainbow tweets per month",
xlab = '',
col = 'lightblue',
axes = FALSE,
border = 'lightblue'
)
Axis(tweet_time, side = 1, col = 'black')
Axis(side = 2, col = 'black')
## Build a wordcloud
library(wordcloud)
wcl <- wordcloud(tweets_merged$text, max.words = 50)
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents
words <- tweets_merged$text
corpus <- tm::Corpus(tm::VectorSource(words))
corpus <- tm::tm_map(corpus, tm::removePunctuation)
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
corpus <- tm::tm_map(corpus, function(x) tm::removeWords(x,
tm::stopwords()))
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents
corpus[[1]]$content
## [1] "dougmcneall hashtag tracking test endrainbow"
#head(sort(termFreq(tweets_merged$text), decreasing = TRUE), 10)
head(sort(termFreq(corpus$content), decreasing = TRUE), 20)
## endrainbow color colour palettes dataviz
## 2953 1223 1064 870 822
## fcrameri rstats achimzeileis rainbow new
## 706 567 562 554 522
## colorspace colors pkg scientific usebatlow
## 447 439 436 432 404
## release much poster visualisation vision
## 344 343 342 334 305
tdm <- TermDocumentMatrix(corpus)
dtm <- DocumentTermMatrix(corpus)
fft <- findFreqTerms(tdm, lowfreq = 100)
fmft <- findMostFreqTerms(dtm,10)
#termFreq()