About


Sys.setenv(LANG = "en")
#library("rstudioapi") #to grab local position of the script
#setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
knitr::opts_knit$set(root.dir = '.')

#library("rvest") # to handle html stuff

library(lubridate) # to handle dates

library(ggplot2) # for plotting
library(cowplot) # for plotting
library(RColorBrewer) # for choosing colors

custompalette <- brewer.pal(n=8, name = 'Dark2')

library(knitr) # for tables
library(kableExtra) # for tables

library(lubridate) # for dates

library(plyr) # ddply, to summarize number of words by author

load('LOK_worksData.RData')

This is a document detailing analysis of Avatar: Legend of Korra Ao3 tag data, collected on the 10 Aug 2020. I haven’t figured out a way to get my scrapper to log in into Ao3 (yet? rvest seems to have some trouble with page redirects), so results here are based on the works visible without authentication, which likely filters out preferentially explicit/problemantic works from the selection.


plot_bar <- function (data, columnX, legendPosition) {
    ggplot(data, aes_string(x = columnX)) + 
    geom_bar(alpha=1)+
    theme_half_open() +
    background_grid() +
    theme(legend.title=element_blank(),
          axis.title.x = element_blank(),
          axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))+
    labs(y="Number of works")
}

plot_bar_color <- function (data, columnX, colColor, legendPosition) {
    ggplot(data, aes_string(x = columnX, fill=colColor)) + 
    geom_bar(alpha=0.7)+
    scale_fill_manual(values = custompalette) +
    theme_half_open() +
    background_grid() +
    theme(legend.title=element_blank(),
          axis.title.x = element_blank(),
          axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))+
    labs(y="Number of works")
}

plot_col <- function (data, columnX, columnY, legendPosition) {
    ggplot(data, aes_string(x = columnX, y = columnY)) + 
    geom_col(alpha=1)+
    theme_half_open() +
    background_grid() +
    theme(legend.title=element_blank(),
          axis.title.x = element_blank(),
          axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))+
    labs(y=gsub('\\.', ' ', columnY))
  
}

plot_col_color <- function (data, columnX, columnY, colColor, legendPosition) {
    ggplot(data, aes_string(x = columnX, y = columnY, fill=colColor)) + 
    geom_col(alpha=0.7)+
    scale_fill_manual(values = custompalette) +
    theme_half_open() +
    background_grid() +
    theme(legend.title=element_blank(),
          axis.title.x = element_blank(),
          axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))+
    labs(y=gsub('\\.', ' ', columnY))
  
}

plot_percentiles <- function (data, columnX, columnY, legendPosition) {
    ggplot(data, aes_string(x = columnX, y = columnY)) + 
    geom_point(alpha=0.3)+
    scale_y_log10(breaks = 10^c(0:15))+
    scale_x_continuous(breaks = c(0, 25, 50, 75, 100))+ #scale_x_continuous(breaks = c(0:10)*10)+
    theme_half_open() +
    background_grid() +
    theme(legend.title=element_blank())+
    labs(x=gsub('\\.', ' ', columnX))
}
#title <- lapply(worksData, function(x) {x$Title})
author <- lapply(worksData, function(x) {x$Author})
fandom <- lapply(worksData, function(x) {x$Fandom})
rating <- lapply(worksData, function(x) {x$Rating})
warnings <- lapply(worksData, function(x) {x$Warnings})
category <- lapply(worksData, function(x) {x$Category})
WIP <- lapply(worksData, function(x) {x$WIP})
date <-lapply(worksData, function(x) {x$Date})
relationships <-lapply(worksData, function(x) {x$Relationships})
character <-lapply(worksData, function(x) {x$Character})
freeform <-lapply(worksData, function(x) {x$Freeform})
language <-lapply(worksData, function(x) {x$Language})
words <-lapply(worksData, function(x) {x$Words})
words[is.na(words)] <- 0
kudos <-lapply(worksData, function(x) {x$Kudos})
kudos[is.na(kudos)] <- 0
comments <-lapply(worksData, function(x) {x$Comments})
comments[is.na(comments)] <- 0
bookmarks<-lapply(worksData, function(x) {x$Bookmarks})
bookmarks[is.na(bookmarks)] <- 0
hits <-lapply(worksData, function(x) {x$Hits})
hits[is.na(hits)] <- 0

stats <- data.frame(Words = unlist(words, recursive = FALSE),
                    Comments= as.numeric(as.character(comments)),
                    Kudos = as.numeric(as.character(kudos)),
                    Bookmarks = as.numeric(as.character(bookmarks)),
                    Hits = as.numeric(as.character(hits)),
                    WIP = unlist(WIP, recursive = FALSE),
                    Rating = unlist(rating, recursive = FALSE),
                    Date = do.call("c", date))

stats$Rating <- factor(stats$Rating, levels = c("Not Rated", "General Audiences", "Teen And Up Audiences", "Mature", "Explicit"))

total <- 1000
percentile <- c(1:total)
percentileData <- data.frame(Works.Percentile = 100*(total - percentile)/total,
                             Words = unlist(lapply(percentile/total, quantile, x = unlist(words) )) + 1,
                             Hits = unlist(lapply(percentile/total, quantile, x = unlist(hits) )) + 1,
                             Kudos = unlist(lapply(percentile/total, quantile, x = unlist(kudos) )) + 1,
                             Comments = unlist(lapply(percentile/total, quantile, x = unlist(comments) )) + 1,
                             Bookmarks = unlist(lapply(percentile/total, quantile, x = unlist(bookmarks) )) + 1 )

rm(rating, kudos, comments, bookmarks, hits)

Timeline

Solid vertical lines on the graph indicate initial air dates, and dashed indicate final air dates, according to Wiki article.

For The Dragon Prince (TDP) we saw a peak of activity after each season, but here it’s not quite the case. After season 1 there’s a peak, which lasts for almost a year, unlike the 2 months we saw in case of TDP. TDP is released via Netflix where the entire season is released all at once, while The Legend of Korra (LOK) has been steadily gaining momentum during its 2-month-long air period.

During and after season 2 release we observe a minor dip, but there’s a growth in the months after. While it was well critically acclaimed, season 2 was not as well recieved by the fandom, and it’s possible that after its conclusion many people took on writing their own personal interpretation.

Season 3 and 4, released in rapid succession fall onto that upward trend, building up to a massive peak which takes approximately 2 years to fall back to pre-season 3 levels.

At around 2017 the popularity steadies at the level of approximately season 1 peak, up until new recent upward growth. It’s hard to say for sure, but this new peak seems to fall approximately onto coronavirus lockdown and release of Avatar: the Last Airbender (ATLA) on US Netflix in May 2020.

Again, some works have been published even before the initial air date of season 1, which could possibly be attributed to ATLA popularity and trailer/Comic Cons hype.


#data$Timestamp <- parse_date_time2(as.character(data$Timestamp), orders = "%d/%m/%Y %H:%M:%S")
#data$day <- as.Date(data$Timestamp)

seasonsStart <- c("2012-04-14", "2013-09-13", "2014-06-27", "2014-10-03")
seasonsStart <- as.Date(seasonsStart)
seasonsEnd <- c("2012-06-23", "2013-11-22", "2014-08-22", "2014-12-19")
seasonsEnd <- as.Date(seasonsEnd)

plotDatesDensityTotal <- ggplot(stats, aes(x = Date)) + 
                    geom_density(alpha = 0.1)+
                    geom_vline(xintercept=seasonsStart)+
                    geom_vline(xintercept=seasonsEnd, linetype ="longdash")+
                    scale_x_date(date_breaks="6 months")+
                    theme_half_open() +
                    background_grid() +
                    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                          legend.position = 'right')
plotDatesDensityTotal


rm(plotDatesDensityTotal)

I collect data from the Ao3 search page (rather than works pages, as it’s less disruptive to site’s function), so I don’t have access to initial postage dates, only the latest updates. This means that the upward trend in works over time can be an artifact of series getting more popular, but also could be attributed to multichapter works drifting further in time due to updates.

If we plot Complete Works and Works in Progress separately, we still observe an overall upward trend, but due to the structure of release dates it’s a bit difficult to draw any conclusions about multichapter drift. Interestingly, if we focus on the works in progress, we see that the current peak seems to start earlier, which may be attributed to 10 year anniversary of ATLA Bluray release in June 2018.


plotDatesDensity <- ggplot(stats, aes(x = Date, col=WIP)) + 
                    geom_density(alpha = 0.1)+
                    geom_vline(xintercept=seasonsStart)+
                    geom_vline(xintercept=seasonsEnd, linetype ="longdash")+
                    scale_x_date(date_breaks="6 months")+
                    theme_half_open() +
                    background_grid() +
                    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                          legend.position = 'right')
plotDatesDensity


rm(plotDatesDensity)

Engagement percentiles

Small plotting cheat: all the numbers on the Y axis are increased by 1 to include the case of 0 into the plot (otherwise excluded because of log scale).

wordsPercentiles <- plot_percentiles(percentileData, 'Works.Percentile', 'Words', 'right')
hitsPercentiles <- plot_percentiles(percentileData, 'Works.Percentile', 'Hits', 'right')
kudosPercentiles <- plot_percentiles(percentileData, 'Works.Percentile', 'Kudos', 'right')
commentsPercentiles <- plot_percentiles(percentileData, 'Works.Percentile', 'Comments', 'right')
bookmarksPercentiles <- plot_percentiles(percentileData, 'Works.Percentile', 'Bookmarks', 'right')

plot_grid(wordsPercentiles + theme(legend.position="none"),
          hitsPercentiles + theme(legend.position="none"),
          kudosPercentiles + theme(legend.position="none"),
          commentsPercentiles + theme(legend.position="none"),
          bookmarksPercentiles + theme(legend.position="none"),
          get_legend(kudosPercentiles +
                     theme(legend.title=element_blank())))


rm(total, percentile, percentileData, wordsPercentiles, hitsPercentiles, kudosPercentiles, commentsPercentiles, bookmarksPercentiles)

Complete Work vs Work in Progress distributions


statsWIP <- stats
statsWIP$Divisor <- unlist(lapply(statsWIP$WIP, function(x) summary(statsWIP$WIP)[names(summary(statsWIP$WIP)) == x]))
statsWIP$Words.per.Work <- statsWIP$Words/statsWIP$Divisor
statsWIP$Hits.per.Work <- statsWIP$Hits/statsWIP$Divisor
statsWIP$Kudos.per.Work <- statsWIP$Kudos/statsWIP$Divisor
statsWIP$Comments.per.Work <- statsWIP$Comments/statsWIP$Divisor
statsWIP$Bookmarks.per.Work <- statsWIP$Bookmarks/statsWIP$Divisor

barWorksWIP <- plot_bar(statsWIP, 'WIP', 'right')
barWordsWIP <- plot_col(statsWIP, 'WIP', 'Words.per.Work', 'right')
barHitsWIP <- plot_col(statsWIP, 'WIP', 'Hits.per.Work', 'right')
barKudosWIP <- plot_col(statsWIP, 'WIP', 'Kudos.per.Work', 'right')
barCommentsWIP <- plot_col(statsWIP, 'WIP', 'Comments.per.Work', 'right')
barBookmarksWIP <- plot_col(statsWIP, 'WIP', 'Bookmarks.per.Work', 'right')

# plot_grid(plot_grid( barWorksWIP + theme(legend.position="none"),
#                      barWordsWIP + theme(legend.position="none"),
#                      barHitsWIP + theme(legend.position="none"),
#                      barKudosWIP + theme(legend.position="none"),
#                      barCommentsWIP + theme(legend.position="none"),
#                      barBookmarksWIP + theme(legend.position="none"),
#                      align = 'hv'),
#           get_legend(barWorksWIP + theme(legend.title=element_blank())),
#           rel_widths = c(4,1),
#           align = 'hv')
plot_grid( barWorksWIP + theme(legend.position="none"),
           barWordsWIP + theme(legend.position="none"),
           barHitsWIP + theme(legend.position="none"),
           barKudosWIP + theme(legend.position="none"),
           barCommentsWIP + theme(legend.position="none"),
           barBookmarksWIP + theme(legend.position="none"),
           align = 'hv')


rm(statsWIP, barWorksWIP, barWordsWIP, barHitsWIP, barKudosWIP, barCommentsWIP, barBookmarksWIP)

Rating distributions


statsRating <- stats
statsRating$Divisor <- unlist(lapply(statsRating$Rating, function(x) summary(statsRating$Rating)[names(summary(statsRating$Rating)) == x]))
statsRating$Words.per.Work <- statsRating$Words/statsRating$Divisor
statsRating$Hits.per.Work <- statsRating$Hits/statsRating$Divisor
statsRating$Kudos.per.Work <- statsRating$Kudos/statsRating$Divisor
statsRating$Comments.per.Work <- statsRating$Comments/statsRating$Divisor
statsRating$Bookmarks.per.Work <- statsRating$Bookmarks/statsRating$Divisor

barWorksRating <- plot_bar(statsRating, 'Rating', 'right')
barWordsRating <- plot_col(statsRating, 'Rating', 'Words.per.Work', 'right')
barHitsRating <- plot_col(statsRating, 'Rating', 'Hits.per.Work', 'right')
barKudosRating <- plot_col(statsRating, 'Rating', 'Kudos.per.Work', 'right')
barCommentsRating <- plot_col(statsRating, 'Rating', 'Comments.per.Work', 'right')
barBookmarksRating <- plot_col(statsRating, 'Rating', 'Bookmarks.per.Work', 'right')

plot_grid( barWorksRating + theme(legend.position="none"),
           barWordsRating + theme(legend.position="none"),
           barHitsRating + theme(legend.position="none"),
           barKudosRating + theme(legend.position="none"),
           barCommentsRating + theme(legend.position="none"),
           barBookmarksRating + theme(legend.position="none"),
           align = 'hv')


rm(statsRating, barWorksRating, barWordsRating, barHitsRating, barKudosRating, barCommentsRating, barBookmarksRating)

Categories

There are 8129 works tagged with a single category, and 1505 tagged with 2 or more (up until all 6).

‘F/F’ is the most popular category, followed by ‘F/M’, ‘Gen’, and ‘M/M’.

Multiple category fics strongly contribute towards ‘F/M’ count, then to ‘F/F’, ‘Gen’, and ‘M/M’, and only marginally to ‘Multi’ and ‘Other’.


singleCategorySummary <- summary(as.factor(unlist(category[unlist(lapply(category, function(x) length(x))) == 1])))
singleCategorySummary <- data.frame(Category = names(singleCategorySummary),
                                    Number.of.Works = singleCategorySummary)
singleCategorySummary$Split <- "Single category"

multipleCategorySummary <- data.frame(Category = c('Gen', 'F/F', 'F/M', 'M/M', 'Multi', 'Other', 'No category'),
                              Number.of.Works = c(sum(grepl('Gen',category)),
                                                  sum(grepl('F/F',category)),
                                                  sum(grepl('F/M',category)),
                                                  sum(grepl('M/M',category)),
                                                  sum(grepl('Multi',category)),
                                                  sum(grepl('Other',category)),
                                                  sum(grepl('No category',category))) )
multipleCategorySummary$Split <- "All works"

categorySummary <- rbind(singleCategorySummary, multipleCategorySummary)
categorySummary$Category <- factor(categorySummary$Category, levels = c('Gen', 'F/F', 'F/M', 'M/M', 'Multi', 'Other', 'No category'))
categorySummary$Split <- factor(categorySummary$Split, levels = c("Single category", "All works"))

plotCategories <- ggplot(categorySummary, aes(x = Category, y = Number.of.Works)) + 
                  geom_col(alpha=1)+
                  theme_half_open() +
                  background_grid() +
                  facet_wrap(.~Split) +
                  theme(legend.title=element_blank(),
                        axis.title.x = element_blank(),
                        axis.text.x = element_text(angle = 90, vjust = 1, hjust=1))+
                  labs(y="Number of Works")
plotCategories


rm(singleCategorySummary, multipleCategorySummary, categorySummary, plotCategories)

Engagement by a single category

For simplicity I’m only looking at works tagged with a single category here.

“Multi” seems to have most words, despite being a rather small category, and collects quite a bit of Hits, Kudos, Comments and Bookmarks. It’s possible that a number of those works are collections of stories for many fandoms, which amplifies the engagement numbers.

Overall, “F/F” category works collect twice as many hits, kudos, and bookmarks as the next category, and more than thrice as many comments as “M/M” works.


statsCategory <- stats[unlist(lapply(category, function(x) length(x))) == 1,]
statsCategory$Category <- as.factor(unlist(category[unlist(lapply(category, function(x) length(x))) == 1]))
statsCategory$Category <- factor(statsCategory$Category, levels = c('Gen', 'F/F', 'F/M', 'M/M', 'Multi', 'Other', 'No category'))
statsCategory$Divisor <- unlist(lapply(statsCategory$Category, function(x) summary(statsCategory$Category)[names(summary(statsCategory$Category)) == x]))
statsCategory$Words.per.Work <- statsCategory$Words/statsCategory$Divisor
statsCategory$Hits.per.Work <- statsCategory$Hits/statsCategory$Divisor
statsCategory$Kudos.per.Work <- statsCategory$Kudos/statsCategory$Divisor
statsCategory$Comments.per.Work <- statsCategory$Comments/statsCategory$Divisor
statsCategory$Bookmarks.per.Work <- statsCategory$Bookmarks/statsCategory$Divisor
statsCategory$Works.Percent <- 1/statsCategory$Divisor

barWorksCategory <- plot_bar_color(statsCategory, 'Category', 'Rating', 'right')
barWordsCategory <- plot_col_color(statsCategory, 'Category', 'Words.per.Work', 'Rating', 'right')
barHitsCategory <- plot_col_color(statsCategory, 'Category', 'Hits.per.Work', 'Rating', 'right')
barKudosCategory <- plot_col_color(statsCategory, 'Category', 'Kudos.per.Work', 'Rating', 'right')
barCommentsCategory <- plot_col_color(statsCategory, 'Category', 'Comments.per.Work', 'Rating', 'right')
barBookmarksCategory <- plot_col_color(statsCategory, 'Category', 'Bookmarks.per.Work','Rating', 'right')

plot_grid(plot_grid( barWorksCategory + theme(legend.position="none"),
           barWordsCategory + theme(legend.position="none"),
           barHitsCategory + theme(legend.position="none"),
           barKudosCategory + theme(legend.position="none"),
           barCommentsCategory + theme(legend.position="none"),
           barBookmarksCategory + theme(legend.position="none"),
           align = 'hv'),
          get_legend(barWorksCategory + theme(legend.title=element_blank())),
          rel_widths = c(4,1))

Ratings percentages by a single category

Out of the 3 main shipping categories, in absolute numbers “F/F” has most E rated works, and “M/M” has the least. However, in relative amounts “M/M” category has more explicit works (26%) than either “F/F” or “F/M”. Overall, the distributions of ratings between the categories are much more balanced than in TDP, perhaps, reflecting a variety of highly plot relevant older female characters in the cast.


plotWorksCategoryNormalized <- plot_col_color(statsCategory, 'Rating', 'Works.Percent', 'Rating', 'none')+
                               scale_y_continuous(labels=scales::percent)+
                               facet_wrap(.~Category)
plotWorksCategoryNormalized


rm(barWorksCategory, barWordsCategory, barHitsCategory, barKudosCategory, barCommentsCategory, barBookmarksCategory, plotWorksCategoryNormalized)

Single Category through time

Seasons 3 and 4 of LOK brought a huge rise in ‘F/F’ category popularity. There’s a smaller peak around summer 2017 which may be due to release of “The Legend of Korra: Turf Wars – Part One” in July 2017.


plotDatesRatingDensity <- ggplot(statsCategory, aes(x = Date, col=Category)) + 
                    geom_density(alpha = 0.1)+
                    geom_vline(xintercept=seasonsStart)+
                    geom_vline(xintercept=seasonsEnd, linetype ="longdash")+
                    scale_x_date(date_breaks="6 months")+
                    scale_color_manual(values = custompalette) +
                    theme_half_open() +
                    background_grid() +
                    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
                          legend.position = 'right')
plotDatesRatingDensity


rm(plotDatesRatingDensity)

Ship tags through time

Seasons 3 and 4 brought a peak of popularity to “Korra/Asami Sato”, but also coincided with the peaks for most of the other top ships, excluding perhaps only “Korra/Mako (Avatar)”, which experienced peaks around season 1 and 3. It’s perhaps worth noting “Lin Beifong/Kya II” experiencing a slightly later peak after seasons 3-4, and then another one starting around summer 2017, which is likely due to “The Legend of Korra: Turf Wars – Part One” reveal of Kya II as sapphic.


plotRelationships <- ggplot() +
    geom_density(data = relationshipsStats[relationshipsStats$relationship1 > 0,], mapping=aes(x = Date), colour=custompalette[1])+
    geom_density(data = relationshipsStats[relationshipsStats$relationship2 > 0,], mapping=aes(x = Date), colour=custompalette[2])+
    geom_density(data = relationshipsStats[relationshipsStats$relationship3 > 0,], mapping=aes(x = Date), colour=custompalette[3])+
    geom_density(data = relationshipsStats[relationshipsStats$relationship4 > 0,], mapping=aes(x = Date), colour=custompalette[4])+
    geom_density(data = relationshipsStats[relationshipsStats$relationship5 > 0,], mapping=aes(x = Date), colour=custompalette[5])+
    geom_density(data = relationshipsStats[relationshipsStats$relationship6 > 0,], mapping=aes(x = Date), colour=custompalette[6])+
    geom_density(data = relationshipsStats[relationshipsStats$relationship7 > 0,], mapping=aes(x = Date), colour=custompalette[7])+
    geom_density(data = relationshipsStats[relationshipsStats$relationship8 > 0,], mapping=aes(x = Date), colour=custompalette[8])+
    geom_vline(xintercept=seasonsStart)+
    geom_vline(xintercept=seasonsEnd, linetype ="longdash")+
    scale_x_date(date_breaks="6 months")+
    scale_color_manual(values = custompalette) +
    theme_half_open() +
    background_grid() +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

mylegend <- get_legend(plotLegendRelationships)

plot_grid(plotRelationships, mylegend,
          rel_widths = c(2,1), nrow=1)

#plotRelationships

#rm(seasons, plotDatesRatingDensity)

Archive Warnings

Majority of works are tagged with “No Archive Warnings Apply”, followed by a sizable fraction of “Creator Chose Not To Use Archive Warnings”. It seems to be a common matter of confusion between the usage of those two warnings, so it’s possible that a lot of “Creator Chose Not To Use Archive Warnings” are mistagged “No Archive Warnings Apply”.


multipleWarningSummary <- data.frame(Warning = c("No Archive Warnings Apply",
                                                  "Graphic Depictions Of Violence",
                                                  "Major Character Death",
                                                  "Rape/Non-Con",
                                                  "Underage",
                                                  "Creator Chose Not To Use Archive Warnings"),
                              Number.of.Works = c(sum(grepl("No Archive Warnings Apply",warnings)),
                                                  sum(grepl("Graphic Depictions Of Violence",warnings)),
                                                  sum(grepl("Major Character Death",warnings)),
                                                  sum(grepl("Rape/Non-Con",warnings)),
                                                  sum(grepl("Underage",warnings)),
                                                  sum(grepl("Creator Chose Not To Use Archive Warnings",warnings))) )

multipleWarningSummary$Warning <- factor(multipleWarningSummary$Warning, levels = c("No Archive Warnings Apply",
                                                                                    "Graphic Depictions Of Violence",
                                                                                    "Major Character Death",
                                                                                    "Rape/Non-Con",
                                                                                    "Underage",
                                                                                    "Creator Chose Not To Use Archive Warnings"))

plotWarnings <- plot_col(multipleWarningSummary, 'Warning', 'Number.of.Works', 'right')
plotWarnings


rm(multipleWarningSummary, plotWarnings)

Multiple Fandoms

Number of works tagged with more than 1 fandom is 1901, but number of works tagged with more than 2 fandoms is 499 which seems to be due to works often being tagged with both “Avatar: Legend of Korra” and “Avatar: The Last Airbender”.

Number of works explicitly tagged as ‘crossover’ is just 246.

Authors by Works

Top 30 of most prolific authors in the tag by the number of stories as of data collection date:

topList <- 30

AuthorTable <- data.frame('Author' = names(summary(as.factor(unlist(author)))[1:topList]),
                          'Number of Stories' = summary(as.factor(unlist(author)))[1:topList])
row.names(AuthorTable) <- c()

kable(AuthorTable,
      col.names = c('Author', 'Number of Stories'))

Author Number of Stories
orphan_account 213
allywonderland 114
spockandawe 99
Writerleft 94
TurboNerdQueen 88
ObjectiveMistress 71
YennaWang 64
RaeDMagdon 55
Vampiric_Charms 55
iviscrit 54
Savorysavery 54
Cybercitizen 52
ItsaVikingThing 49
notgeorgelucas 49
Lion01 48
DraceDomino 44
ragnarok89 44
LarirenShadow 42
Nightworldlove 42
Angel_of_the_Starz 40
braigwen_s 40
Swani 39
slacktension 35
LissaBear 34
LizBee 34
AvatarAang7 33
gillywulf 33
TheWillowTree 33
kittymannequin 32
Carliro 29


rm(AuthorTable)

Top place is occupied by orphan_account, which is an artifact of archive’ works orphaning function.

Authors by Words

Only 137 works have more than one author. In cases where works had more than one author, I assumed that each of them contributed an equal amounts of words.

Top 30 of most prolific authors in the tag by the number of words written as of data collection date:


wordsByAuthor <- c()

for (i in 1:length(words)){
  if (length(author[[i]]) > 1) {
    wordsByAuthor <- c(wordsByAuthor, rep(words[[i]]/length(author[[5]]), length(author[[i]]) ) )
  } else {
    wordsByAuthor <- c(wordsByAuthor, words[[i]])
  }
}

AuthorWordsTable <- data.frame('Author' = as.factor(unlist(author)),
                               'Words' = wordsByAuthor)

AuthorWordsSummary <- ddply(AuthorWordsTable, .(Author), 
                            summarize, 
                            Total.Words = sum(Words))
AuthorWordsSummary <- AuthorWordsSummary[order(AuthorWordsSummary$Total.Words, decreasing = TRUE),]
row.names(AuthorWordsSummary) <- c()

topList <- 30

kable(AuthorWordsSummary[1:topList,],
      col.names = c('Author', 'Total Words'))

Author Total Words
OurImpavidHeroine 1423345
Raven_Hallowryn 1230634
orphan_account 1152506
laurrayn 942054
WestOrEast 782403
RaeDMagdon 742536
duvarneya 705828
99nzhe 703828
sanctum_c 688005
Puffie 686658
YennaWang 672519
DimensionalLover 661214
kittymannequin 642239
AvatarAang7 636877
Apnsb 623536
Lion01 598358
Revans_Mask 564095
commandmetobewell 556755
MrMander 553751
DraceDomino 542456
SkyLynnx 531015
Destiny_Smasher 530580
Cuofeng 518607
AutyRose 507216
Writerleft 503703
chaisan 494807
Angel_of_the_Starz 485085
chelonianmobile 484384
MultiFanGirlWickedPony 484384
Writearoundchic 484384


rm(wordsByAuthor, i, AuthorWordsTable, AuthorWordsSummary)

Interestingly, orphan_account made it to the top by the number of words written as well.

Characters

Top 30 of the most popular characters:

topList <- 30
CharacterTable<- data.frame('Character' = names(summary(as.factor(unlist(character)))[1:topList]),
                          'Number of Stories' = summary(as.factor(unlist(character)))[1:topList])
row.names(CharacterTable) <- c()

kable(CharacterTable,
      col.names = c('Character', 'Number of Stories'))

Character Number of Stories
Korra (Avatar) 5736
Asami Sato 4973
Mako (Avatar) 2544
Bolin (Avatar) 2302
Tenzin (Avatar) 1274
Lin Beifong 1206
Opal (Avatar) 942
Kuvira (Avatar) 904
Jinora (Avatar) 733
Katara (Avatar) 698
Kya II (Avatar) 648
Korra 624
Suyin Beifong 593
Pema (Avatar) 494
Aang (Avatar) 465
Zuko (Avatar) 458
Tonraq (Avatar) 409
Lin Bei Fong 408
Hiroshi Sato 393
Toph Beifong 393
Ikki (Avatar) 371
Senna (Avatar) 363
Original Characters 351
Amon (Avatar) 343
Sokka (Avatar) 329
Iroh II (Avatar) 322
Baatar Jr. (Avatar) 321
Bumi II (Avatar) 317
Varrick (Avatar) 305
Meelo (Avatar) 304


rm(CharacterTable)

Relationships

Top 30 of the most popular relationships:

I don’t have access to Ao3’s system of synonymous tags, so by virtue of text processing some relationship tags here are repeated.

Overwhelmingly, “Korra/Asami Sato”/“Korrasami”/“Korra/Asami” is the most popular relationship in LOK, contributing to popularity of “F/F” category. They are followed by “Korra/Mako (Avatar)”, and “Bolin/Opal (Avatar)”.

topList <- 30
RelationshipsTable<- data.frame('Relationship' = names(summary(as.factor(unlist(relationships)))[1:topList]),
                          'Number of Stories' = summary(as.factor(unlist(relationships)))[1:topList])
row.names(RelationshipsTable) <- c()

kable(RelationshipsTable,
      col.names = c('Relationship', 'Number of Stories'))

Relationship Number of Stories
Korra/Asami Sato 4393
Korra/Mako (Avatar) 606
Bolin/Opal (Avatar) 486
Lin Beifong/Kya II 311
Baatar Jr./Kuvira (Avatar) 222
Aang/Katara (Avatar) 209
Korrasami 185
Jinora/Kai (Avatar) 174
Pema/Tenzin (Avatar) 168
Mako/Asami Sato 156
Lin Beifong/Tenzin 145
Mako/Prince Wu (Avatar) 142
Korra & Asami Sato 139
Bolin/Korra (Avatar) 138
Amon/Lieutenant (Avatar) 96
Korra/Kuvira (Avatar) 91
Mai/Zuko (Avatar) 86
Katara/Zuko (Avatar) 82
Amon/Korra (Avatar) 81
Varrick/Zhu Li Moon 81
Korra/Kuvira 75
Senna/Tonraq (Avatar) 75
Korra/Tahno (Avatar) 73
Bolin & Mako (Avatar) 70
Toph Beifong/Sokka 65
Lin Bei Fong/Tenzin 62
Sokka/Suki (Avatar) 61
Suyin Beifong/Kuvira 55
Baatar Sr./Suyin Beifong 53
Korra/Asami 52


rm(RelationshipsTable)

Freeform tags

Top 30 of the most popular freeform tags:

topList <- 30
FreeformTable<- data.frame('Freeform' = names(summary(as.factor(unlist(freeform)))[1:topList]),
                          'Number of Stories' = summary(as.factor(unlist(freeform)))[1:topList])
row.names(FreeformTable) <- c()

kable(FreeformTable,
      col.names = c('Freeform Tag', 'Number of Stories'))

Freeform Tag Number of Stories
Fluff 1229
Romance 803
Angst 703
Alternate Universe - Modern Setting 610
Korrasami - Freeform 430
Alternate Universe 389
Smut 389
Friendship 341
Canon Compliant 329
Hurt/Comfort 329
Humor 307
Alternate Universe - Canon Divergence 289
Family 288
Post-Canon 242
Established Relationship 226
One Shot 219
Oral Sex 194
Drama 191
Crossover 179
Slow Burn 178
Drabble 177
Fluff and Angst 173
Alternate Universe - College/University 170
AU 160
Friends to Lovers 153
Alpha/Beta/Omega Dynamics 151
Femslash 150
Love 148
Cross-Posted on FanFiction.Net 137
Emotional Hurt/Comfort 133


rm(FreeformTable)

Languages

Unsurprisingly, most works are written in English. Apologies for U+. kable package for whatever reason murders unicode characters. The two languages in question are Russian (Русский) and Chinese (中文).

#topList <- 30

languagesList <- summary(as.factor(unlist(language)))

LanguageTable <- data.frame('Language' = names(languagesList),
                            'Number of Stories' = languagesList )
LanguageTable <- LanguageTable[order(LanguageTable$Number.of.Stories, decreasing=TRUE),]
row.names(LanguageTable) <- c()

kable(LanguageTable,
      col.names = c('Language', 'Number of Stories'))

Language Number of Stories
English 9445
Español 74
Français 71
<U+0420><U+0443><U+0441><U+0441><U+043A><U+0438><U+0439> 17
<U+4E2D><U+6587> 8
Português brasileiro 7
Italiano 6
Deutsch 5
Polski 1


#languagesList

#rm(LanguageTable)
