This project looks at data from the micro-campaign #endrainbow on twitter. The aim of the campaign was to convince climate and earth-system
scientists to stop using rainbow-based colour palettes (such as matlab’s Jet, or R’s Spectral) as defaults in their scientific visualisations.

The campaign had some success, with big journals like Nature changing their editorial policies on colour palettes, although it’s difficult
attribute these successes to the #endrainbow campaign.

Other useful data would include:
Counts of rainbow figures in journals through time.
Count number of journals with policies through time.
Reproduce rainbow threshold game.

Useful link for text mining

https://www.r-bloggers.com/2021/03/text-mining-term-frequency-analysis-and-word-cloud-creation-using-the-tm-package/

Load data

Two spreadsheets, covering 2017 - 2019 and 2019 - 2023 converted to csv from google sheets. Each row is a tweet.

There are four columns:

handle: User’s Twitter handle
text: Text of the tweet
url: URL of the tweet
time: Date and time of the tweet in UTC.

tweets <- read_csv('data/end_rainbow_tweets.csv', col_names = c('handle', 'text', 'url', 'time'))
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   handle = col_character(),
##   text = col_character(),
##   url = col_character(),
##   time = col_character()
## )
tweets1 <- read_csv('data/end_rainbow_tweets_1.csv', col_names = c('handle', 'text', 'url', 'time'))
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   handle = col_character(),
##   text = col_character(),
##   url = col_character(),
##   time = col_character()
## )
tweets_merged <- rbind(tweets, tweets1)

How many unique users tweeted?

length(unique(tweets_merged$handle))
## [1] 1921

Top 20 #endrainbow tweeters

head(sort(table(tweets_merged$handle), decreasing = TRUE), 20)
## 
##        @fcrameri     @dougmcneall    @kennethfield    @AchimZeileis 
##               72               54               40               38 
##   @RetoStauffer2  @AndyDoggerBank @opensource_orgs     @philipheron 
##               27               24               23               22 
##      @ed_hawkins         @jscarto     @rstatstweet   @michistoelzle 
##               19               19               18               16 
## @threadreaderapp     @JamesPope10        @My_Carta   @BetterFigures 
##               16               15               15               14 
##      @ShepGracie        @CEEDOslo       @kwinkunks           @obspy 
##               14               13               12               12

Convert the odd time format into something R understands

tweet_time <- mdy_hm(tweets_merged$time)

A histogram of #endrainbow tweets per month

par(las = 1)
tweet_hist <- hist(tweet_time, 
                   breaks = 'months',
                   freq = TRUE,
                   main = "#endrainbow tweets per month",
                   xlab = '',
                   col = 'lightblue',
                   axes = FALSE,
                   border = 'lightblue'
)
Axis(tweet_time, side = 1, col = 'black')
Axis(side = 2, col = 'black')

## Build a wordcloud

library(wordcloud)

wcl <- wordcloud(tweets_merged$text, max.words = 50)
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents

Create a corpus and remove stopwords (it, the, and etc.) and punctuation (including handles).

words <- tweets_merged$text

            corpus <- tm::Corpus(tm::VectorSource(words))
            corpus <- tm::tm_map(corpus, tm::removePunctuation)
## Warning in tm_map.SimpleCorpus(corpus, tm::removePunctuation): transformation
## drops documents
            corpus <- tm::tm_map(corpus, function(x) tm::removeWords(x, 
                tm::stopwords()))
## Warning in tm_map.SimpleCorpus(corpus, function(x) tm::removeWords(x,
## tm::stopwords())): transformation drops documents
corpus[[1]]$content
## [1] "dougmcneall hashtag tracking test endrainbow"

Most common words in the corpus

#head(sort(termFreq(tweets_merged$text), decreasing = TRUE), 10)

head(sort(termFreq(corpus$content), decreasing = TRUE), 20)
##    endrainbow         color        colour      palettes       dataviz 
##          2953          1223          1064           870           822 
##      fcrameri        rstats  achimzeileis       rainbow           new 
##           706           567           562           554           522 
##    colorspace        colors           pkg    scientific     usebatlow 
##           447           439           436           432           404 
##       release          much        poster visualisation        vision 
##           344           343           342           334           305

More corpus transformations

tdm <- TermDocumentMatrix(corpus)

dtm <- DocumentTermMatrix(corpus)

fft <- findFreqTerms(tdm, lowfreq = 100)

fmft <- findMostFreqTerms(dtm,10)

#termFreq()