In this project, I showcase how to webscrape the top 100 DJs of 2021 according to DJ Magazine.
Setup
Load in the packages rvest
, dplyr
, tidyr
and stringr
. The page contents can be loaded with the use of read_html()
.
# DJ Magazine Top 100 2021 Webscraping
# Load libraries:
library(rvest)
library(dplyr)
library(tidyr)
library(stringr)
# Link:
url <- "https://djmag.com/top100djs/"
# DJ Mag 2021 Page:
dj_mag2021_page <- read_html(url)
From the rvest
package I use html_nodes()
along with the class name from clicking Inspect on the page. This project was one of my earlier webscraping projects so I do not use xpath.
I found that there was no name to extract rank from the page. It was better just to use seq(1, 100)
for the Rank column.
Artist/DJ Name
# Rank can be obtained without webscraping, I can use seq(1, 100) for Rank column
# DJ/Artist Name:
names <- dj_mag2021_page %>%
html_nodes("[class='top100dj-name']") %>%
html_text2()
Rank Movement
# Movement:
rank_movement <- dj_mag2021_page %>%
html_nodes("[class='top100dj-movement']") %>%
html_text2()
DJ Magazine 2021 Interview Links
The interview links
# 2021 DJMag Interview URL Links:
dj_interview_urls <- dj_mag2021_page %>%
html_nodes("[class='top100dj-name']") %>%
html_element('a') %>%
html_attr('href')
dj_interview_urls <- paste0("https://djmag.com/", dj_interview_urls)
With the extracted parts, the dataframe can be made.
# Create Dataframe:
dj_mag_2021_df <- data.frame(Rank = seq(1, 100),
DJ = names,
Rank_Change = rank_movement,
Interview_URL = dj_interview_urls)
head(dj_mag_2021_df, 10)
## Rank DJ Rank_Change
## 1 1 David Guetta Non Mover
## 2 2 Martin Garrix Up 1
## 3 3 Armin Van Buuren Up 1
## 4 4 Alok Up 1
## 5 5 Dimitri Vegas & Like Mike Down 3
## 6 6 Afrojack Up 1
## 7 7 Don Diablo Down 1
## 8 8 Oliver Heldens Stay 8
## 9 9 Timmy Trumpet Up 1
## 10 10 Steve Aoki Down 1
## Interview_URL
## 1 https://djmag.com//top-100-djs/poll-2021-david-guetta
## 2 https://djmag.com//top-100-djs/poll-2021-martin-garrix
## 3 https://djmag.com//top-100-djs/poll-2021-armin-van-buuren
## 4 https://djmag.com//top-100-djs/poll-2021-alok
## 5 https://djmag.com//top-100-djs/poll-2021-dimitri-vegas-mike
## 6 https://djmag.com//top-100-djs/poll-2021-afrojack
## 7 https://djmag.com//top-100-djs/poll-2021-don-diablo
## 8 https://djmag.com//top-100-djs/poll-2021-oliver-heldens
## 9 https://djmag.com//top-100-djs/poll-2021-timmy-trumpet
## 10 https://djmag.com//top-100-djs/poll-2021-steve-aoki
New Entries
## Filter for New Entries:
new_entries <- dj_mag_2021_df %>% filter(Rank_Change == 'New Entry')
new_entries
## Rank DJ Rank_Change
## 1 39 MORTEN New Entry
## 2 55 Beauz New Entry
## 3 69 Panta Q New Entry
## 4 72 Naeleck New Entry
## 5 78 Virtual Riot New Entry
## 6 83 KAAZE New Entry
## 7 90 Giftback New Entry
## 8 91 Christina Novelli New Entry
## 9 93 KAKA New Entry
## 10 96 Luminn New Entry
## 11 97 Alexso New Entry
## 12 99 Dexter King New Entry
## 13 100 VIZE New Entry
## Interview_URL
## 1 https://djmag.com//top-100-djs/poll-2021-morten
## 2 https://djmag.com//top-100-djs/poll-2021-beauz
## 3 https://djmag.com//top-100-djs/poll-2021-panta-q
## 4 https://djmag.com//top-100-djs/poll-2021-naeleck
## 5 https://djmag.com//top-100-djs/poll-2021-virtual-riot
## 6 https://djmag.com//top-100-djs/poll-2021-kaaze
## 7 https://djmag.com//top-100-djs/poll-2021-giftback
## 8 https://djmag.com//top-100-djs/poll-2021-christina-novelli
## 9 https://djmag.com//top-100-djs/poll-2021-kaka
## 10 https://djmag.com//top-100-djs/poll-2021-luminn
## 11 https://djmag.com//top-100-djs/poll-2021-alexso
## 12 https://djmag.com//top-100-djs/poll-2021-dexter-king
## 13 https://djmag.com//top-100-djs/poll-2021-vize
## DJs with Re-Entry (Fell out of top 100 and are back in top 100 in 2021)
re_entries <- dj_mag_2021_df %>% filter(Rank_Change == 'Re-entry')
re_entries
## Rank DJ Rank_Change
## 1 37 Carnage/GORDO Re-entry
## 2 88 MaRLo Re-entry
## 3 98 Daddy’s Groove Re-entry
## Interview_URL
## 1 https://djmag.com//top-100-djs/poll-2021-carnagegordo
## 2 https://djmag.com//top-100-djs/poll-2021-marlo
## 3 https://djmag.com//top-100-djs/poll-2021-daddy-s-groove
## DJs who moved up:
up_movers <- dj_mag_2021_df %>% filter(grepl('Up', Rank_Change))
head(up_movers, 10)
## Rank DJ Rank_Change
## 1 2 Martin Garrix Up 1
## 2 3 Armin Van Buuren Up 1
## 3 4 Alok Up 1
## 4 6 Afrojack Up 1
## 5 9 Timmy Trumpet Up 1
## 6 11 KSHMR Up 1
## 7 12 R3hab Up 1
## 8 15 Tiësto Up 1
## 9 16 Calvin Harris Up 2
## 10 17 Vintage Culture Up 13
## Interview_URL
## 1 https://djmag.com//top-100-djs/poll-2021-martin-garrix
## 2 https://djmag.com//top-100-djs/poll-2021-armin-van-buuren
## 3 https://djmag.com//top-100-djs/poll-2021-alok
## 4 https://djmag.com//top-100-djs/poll-2021-afrojack
## 5 https://djmag.com//top-100-djs/poll-2021-timmy-trumpet
## 6 https://djmag.com//top-100-djs/poll-2021-kshmr
## 7 https://djmag.com//top-100-djs/poll-2021-r3hab
## 8 https://djmag.com//top-100-djs/poll-2021-ti-sto
## 9 https://djmag.com//top-100-djs/poll-2021-calvin-harris
## 10 https://djmag.com//top-100-djs/poll-2021-vintage-culture
## DJs who moved down:
down_movers <- dj_mag_2021_df %>% filter(grepl('Down', Rank_Change))
head(down_movers, 10)
## Rank DJ Rank_Change
## 1 5 Dimitri Vegas & Like Mike Down 3
## 2 7 Don Diablo Down 1
## 3 10 Steve Aoki Down 1
## 4 13 Marshmello Down 2
## 5 21 Above & Beyond Down 2
## 6 24 Skrillex Down 9
## 7 25 Lost Frequencies Down 4
## 8 29 The Chainsmokers Down 2
## 9 31 Eric Prydz Down 9
## 10 33 DJ Snake Down 8
## Interview_URL
## 1 https://djmag.com//top-100-djs/poll-2021-dimitri-vegas-mike
## 2 https://djmag.com//top-100-djs/poll-2021-don-diablo
## 3 https://djmag.com//top-100-djs/poll-2021-steve-aoki
## 4 https://djmag.com//top-100-djs/poll-2021-marshmello
## 5 https://djmag.com//top-100-djs/poll-2021-above-beyond
## 6 https://djmag.com//top-100-djs/poll-2021-skrillex
## 7 https://djmag.com//top-100-djs/poll-2021-lost-frequencies
## 8 https://djmag.com//top-100-djs/poll-2021-chainsmokers
## 9 https://djmag.com//top-100-djs/poll-2021-eric-prydz
## 10 https://djmag.com//top-100-djs/poll-2021-dj-snake
## Non-Movers
non_movers <- dj_mag_2021_df %>% filter(Rank_Change == 'Non Mover')
non_movers
## Rank DJ Rank_Change
## 1 1 David Guetta Non Mover
## 2 14 W&W Non Mover
## 3 74 Kura Non Mover
## 4 87 Sunnery James & Ryan Marciano Non Mover
## Interview_URL
## 1 https://djmag.com//top-100-djs/poll-2021-david-guetta
## 2 https://djmag.com//top-100-djs/poll-2021-ww
## 3 https://djmag.com//top-100-djs/poll-2021-kura
## 4 https://djmag.com//top-100-djs/poll-2021-sunnery-james-ryan-marciano