The table below provides relative frequency estimates (as z-scores) for each descriptor when combined with the noun at the top of each column (person, woman, man, girl, boy). The index column is the average across ngrams for each descriptor, leaving out the maximum value. For more information about the calculation of this index, the sources of this (over-inclusive) list of terms and data, or additional details, see the manuscript listed below.
Condon, D. M., McDougald, S., & Altgassen, E. (under review). Frequency of use metrics for person descriptors: Extensions of Roivainen’s internet search methodology. PsyArXiv. https://doi.org/10.31234/osf.io/9gtj7
library(here) # for engaging with working environment
library(Hmisc) # for weighted means
library(DT) # for viewing data tables
library(tidyverse) # for data cleaning and manipulation
library(dataverse)
library(data.table)
# create table
serps = read.csv(here("data/serps/serps_freq_index.csv"))
serps$person_zscore <- round(serps$person_zscore, 4)
serps$woman_zscore <- round(serps$woman_zscore, 4)
serps$man_zscore <- round(serps$man_zscore, 4)
serps$girl_zscore <- round(serps$girl_zscore, 4)
serps$boy_zscore <- round(serps$boy_zscore, 4)
serps$freq_index <- round(serps$freq_index, 4)
# fix characters
#serps$TDA <- sub("blas\x8e", "blasé", serps$TDA)
#serps$TDA <- sub("FALSE", "false", serps$TDA)
serps_prop = serps %>%
DT::datatable( # put into interactive HTML table
colnames = c("Adjective", "+person", "+woman", "+man", "+girl", "+boy", "index"), #colnames
options = list(
lengthMenu = list(c(10, 50, -1), c('10', '50', 'All')),
pageLength = 10
),
filter = "top", # can filter
rownames = F # don't need rownames
)
serps_prop