Hello. In this webscrape project in R, I extract recent cryptocurrency price data from Coingecko.
The setup consists of loading four libraries. These libraries are dplyr
, tidyr
, rvest
and stringr
.
# Coingecko Web Scrape In R
# Reference: https://stackoverflow.com/questions/45450981/rvest-scrape-2-classes-in-1-tag
# https://www.statology.org/remove-dollar-sign-in-r/
# Load libraries:
library(dplyr)
library(tidyr)
library(rvest)
library(stringr)
From the rvest
package one of the main functions that is needed is read_html()
. Once the html page is loaded do extract the tbody
part of the html page.
## Extract top 100 cryptos on Coingecko.com
page <- read_html("https://www.coingecko.com/")
table <- page %>% html_element('tbody')
Rank
# Get Rank:
rank <- table %>%
html_nodes(
"[class='table-number tw-text-left text-xs cg-sticky-col cg-sticky-second-col tw-max-w-14 lg:tw-w-14']"
) %>%
html_text2() %>%
readr::parse_integer()
# Show top 30
rank[0:30]
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30
Crypto Name
# Name
crypto_name <- table %>%
html_nodes("[class='tw-hidden lg:tw-flex font-bold tw-items-center tw-justify-between']") %>%
html_text2()
crypto_name[0:30]
## [1] "Bitcoin" "Ethereum" "Tether"
## [4] "BNB" "USD Coin" "XRP"
## [7] "Solana" "Cardano" "Terra"
## [10] "Avalanche" "Polkadot" "Dogecoin"
## [13] "Binance USD" "TerraUSD" "Shiba Inu"
## [16] "Wrapped Bitcoin" "NEAR Protocol" "Cronos"
## [19] "Lido Staked Ether" "Polygon" "Dai"
## [22] "Litecoin" "Cosmos Hub" "Chainlink"
## [25] "Bitcoin Cash" "TRON" "FTX Token"
## [28] "LEO Token" "OKB" "Stellar"
Ticker Symbol
# Ticker
ticker <- table %>%
html_nodes("[class='d-lg-none font-bold tw-w-12']") %>%
html_text2()
ticker[0:30]
## [1] "BTC" "ETH" "USDT" "BNB" "USDC" "XRP" "SOL" "ADA" "LUNA"
## [10] "AVAX" "DOT" "DOGE" "BUSD" "UST" "SHIB" "WBTC" "NEAR" "CRO"
## [19] "STETH" "MATIC" "DAI" "LTC" "ATOM" "LINK" "BCH" "TRX" "FTT"
## [28] "LEO" "OKB" "XLM"
Prices
# Extract price:
price <- table %>%
html_nodes('[class="td-price price text-right pl-0"]') %>%
html_text2()
price[0:30]
## [1] "$40,505.71" "$3,045.67" "$1.00" "$416.17"
## [5] "$0.999188" "$0.781021" "$102.93" "$0.956710"
## [9] "$81.51" "$77.99" "$18.50" "$0.144472"
## [13] "$0.999752" "$1.00" "$0.000025879106" "$40,501.94"
## [17] "$16.05" "$0.411197" "$3,044.92" "$1.39"
## [21] "$1.00" "$113.36" "$24.30" "$14.17"
## [25] "$344.79" "$0.062043044551" "$42.72" "$5.83"
## [29] "$20.66" "$0.207382"
One Hour Percentage Changes
# 1 Hour Change
change_1h <- table %>%
html_nodes("[class='td-change1h change1h stat-percent text-right col-market']") %>%
html_text2()
change_1h[0:30]
## [1] "0.1%" "0.1%" "-0.0%" "0.2%" "-0.1%" "0.2%" "0.6%" "0.2%" "0.2%"
## [10] "0.5%" "0.1%" "0.2%" "-0.0%" "-0.0%" "0.4%" "0.1%" "0.2%" "0.1%"
## [19] "0.2%" "-0.4%" "-0.1%" "0.5%" "-0.1%" "0.1%" "0.1%" "0.0%" "0.1%"
## [28] "0.0%" "-0.3%" "0.3%"
Twenty Four Hour Percentage Changes
# 24 Hour Change
change_24h <- table %>%
html_nodes("[class='td-change24h change24h stat-percent text-right col-market']") %>%
html_text2
change_24h[0:30]
## [1] "-0.5%" "-0.1%" "-1.2%" "-1.0%" "-1.2%" "0.3%" "1.2%" "0.0%" "0.4%"
## [10] "2.6%" "1.5%" "-2.2%" "-1.2%" "-1.3%" "-0.7%" "-0.3%" "0.7%" "0.1%"
## [19] "0.4%" "0.1%" "-1.1%" "1.8%" "0.2%" "1.7%" "1.0%" "0.2%" "-0.4%"
## [28] "-0.5%" "2.4%" "1.3%"
Seven Day Percentage Changes
# 7 Day Change
change_7d <- table %>%
html_nodes("[class='td-change7d change7d stat-percent text-right col-market']") %>%
html_text2()
change_7d[0:30]
## [1] "-4.3%" "-4.6%" "-0.0%" "-1.3%" "0.1%" "3.5%" "-6.2%" "-6.9%"
## [9] "-13.6%" "-6.8%" "-4.5%" "1.4%" "0.0%" "0.1%" "9.0%" "-4.2%"
## [17] "-8.6%" "-5.8%" "-4.5%" "-2.9%" "-0.2%" "2.3%" "-9.5%" "-6.4%"
## [25] "6.7%" "-0.9%" "-4.5%" "-0.8%" "7.2%" "2.7%"
24 Hour Volume
# 24 Hour Volume
volume_24h <- table %>%
html_nodes("[class='td-liquidity_score lit text-right col-market']") %>%
html_text2()
volume_24h[0:30]
## [1] "$17,141,078,695" "$9,325,984,551" "$32,582,688,503" "$965,638,462"
## [5] "$3,811,066,232" "$2,852,868,049" "$821,577,710" "$433,333,442"
## [9] "$913,540,057" "$289,038,797" "$332,613,399" "$719,948,258"
## [13] "$2,167,177,182" "$351,138,325" "$340,763,112" "$129,239,837"
## [17] "$483,480,810" "$37,843,945" "$82,009,930" "$272,650,587"
## [21] "$137,385,215" "$533,073,249" "$277,770,198" "$313,656,958"
## [25] "$273,934,238" "$893,091,206" "$97,892,343" "$1,759,785"
## [29] "$63,039,831" "$196,606,123"
Market Cap
# Market Cap
market_cap <- table %>%
html_nodes("[class='td-market_cap cap col-market cap-price text-right']") %>%
html_text2()
market_cap[0:30]
## [1] "$770,303,462,673" "$366,630,945,738" "$82,792,742,856" "$69,936,419,454"
## [5] "$50,058,856,536" "$37,569,475,140" "$34,298,774,589" "$30,635,118,620"
## [9] "$29,239,478,367" "$20,918,338,722" "$20,388,687,382" "$19,165,614,641"
## [13] "$17,448,631,823" "$17,343,901,926" "$14,210,233,203" "$11,265,480,083"
## [17] "$10,804,641,768" "$10,389,876,302" "$10,278,286,691" "$9,578,568,019"
## [21] "$8,794,606,672" "$7,942,675,830" "$7,104,889,918" "$6,616,560,903"
## [25] "$6,513,485,570" "$6,308,052,481" "$5,855,713,135" "$5,457,600,547"
## [29] "$5,373,766,209" "$5,127,308,929"
Once the table parts have been obtained, the dataframe can be assembled with the use of data.frame()
.
# Put together as dataframe:
cg_data <- data.frame(
Rank = rank,
Crypto = crypto_name,
Ticker = ticker,
Price = price,
Change_1h = change_1h,
Change_24h = change_24h,
Change_7d = change_7d,
Volume_24h = volume_24h,
Market_Cap = market_cap
)
# Preview dataframe
head(cg_data, 10)
## Rank Crypto Ticker Price Change_1h Change_24h Change_7d
## 1 1 Bitcoin BTC $40,505.71 0.1% -0.5% -4.3%
## 2 2 Ethereum ETH $3,045.67 0.1% -0.1% -4.6%
## 3 3 Tether USDT $1.00 -0.0% -1.2% -0.0%
## 4 4 BNB BNB $416.17 0.2% -1.0% -1.3%
## 5 5 USD Coin USDC $0.999188 -0.1% -1.2% 0.1%
## 6 6 XRP XRP $0.781021 0.2% 0.3% 3.5%
## 7 7 Solana SOL $102.93 0.6% 1.2% -6.2%
## 8 8 Cardano ADA $0.956710 0.2% 0.0% -6.9%
## 9 9 Terra LUNA $81.51 0.2% 0.4% -13.6%
## 10 10 Avalanche AVAX $77.99 0.5% 2.6% -6.8%
## Volume_24h Market_Cap
## 1 $17,141,078,695 $770,303,462,673
## 2 $9,325,984,551 $366,630,945,738
## 3 $32,582,688,503 $82,792,742,856
## 4 $965,638,462 $69,936,419,454
## 5 $3,811,066,232 $50,058,856,536
## 6 $2,852,868,049 $37,569,475,140
## 7 $821,577,710 $34,298,774,589
## 8 $433,333,442 $30,635,118,620
## 9 $913,540,057 $29,239,478,367
## 10 $289,038,797 $20,918,338,722
tail(cg_data, 10)
## Rank Crypto Ticker Price Change_1h Change_24h
## 91 91 Stacks STX $1.18 -0.1% -1.7%
## 92 92 Synthetix Network Token SNX $5.28 0.2% -6.0%
## 93 93 Dash DASH $108.02 0.6% -1.5%
## 94 94 Loopring LRC $0.902922 1.0% 1.7%
## 95 95 Basic Attention Token BAT $0.750673 -0.3% 3.6%
## 96 96 BitDAO BIT $1.22 -0.1% 0.9%
## 97 97 Mina Protocol MINA $2.43 1.2% 5.1%
## 98 98 Amp AMP $0.022802951520 -0.1% 0.5%
## 99 99 Pax Dollar USDP $0.999416 -0.3% -1.4%
## 100 100 GateToken GT $6.46 0.1% -3.1%
## Change_7d Volume_24h Market_Cap
## 91 -5.4% $8,466,377 $1,240,403,021
## 92 1.1% $63,421,108 $1,153,124,426
## 93 -2.6% $159,006,163 $1,152,774,284
## 94 -6.2% $65,595,451 $1,125,649,097
## 95 1.2% $107,388,722 $1,125,379,206
## 96 -0.5% $37,169,365 $1,122,926,453
## 97 -24.3% $48,122,248 $1,120,956,031
## 98 -7.0% $6,409,967 $1,094,167,291
## 99 -0.1% $469,353,790 $1,005,544,802
## 100 -2.6% $5,712,497 $980,947,557
# Data types for columns
str(cg_data)
## 'data.frame': 100 obs. of 9 variables:
## $ Rank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Crypto : chr "Bitcoin" "Ethereum" "Tether" "BNB" ...
## $ Ticker : chr "BTC" "ETH" "USDT" "BNB" ...
## $ Price : chr "$40,505.71" "$3,045.67" "$1.00" "$416.17" ...
## $ Change_1h : chr "0.1%" "0.1%" "-0.0%" "0.2%" ...
## $ Change_24h: chr "-0.5%" "-0.1%" "-1.2%" "-1.0%" ...
## $ Change_7d : chr "-4.3%" "-4.6%" "-0.0%" "-1.3%" ...
## $ Volume_24h: chr "$17,141,078,695" "$9,325,984,551" "$32,582,688,503" "$965,638,462" ...
## $ Market_Cap: chr "$770,303,462,673" "$366,630,945,738" "$82,792,742,856" "$69,936,419,454" ...
As an optional thing, you can save the Top100 Crypto prices dataframe into a .csv
file with write.csv()
.
# Data when function is declared
Sys.Date()
## [1] "2022-04-16"
# Save into .csv file:
write.csv(cg_data, paste("Coingecko_Prices_", Sys.Date(), '.csv', sep = ""), row.names = FALSE)