Hello. In this webscrape project in R, I extract recent cryptocurrency price data from Coingecko.

 

Contents

Setup

The setup consists of loading four libraries. These libraries are dplyr, tidyr, rvest and stringr.

# Coingecko Web Scrape In R
# Reference: https://stackoverflow.com/questions/45450981/rvest-scrape-2-classes-in-1-tag
# https://www.statology.org/remove-dollar-sign-in-r/

# Load libraries:

library(dplyr)
library(tidyr)
library(rvest)
library(stringr)

 

Obtain Data

From the rvest package one of the main functions that is needed is read_html(). Once the html page is loaded do extract the tbody part of the html page.

## Extract top 100 cryptos on Coingecko.com

page <- read_html("https://www.coingecko.com/")

table <- page %>% html_element('tbody')

 

Rank

# Get Rank:
rank <- table %>%
  html_nodes(
    "[class='table-number tw-text-left text-xs cg-sticky-col cg-sticky-second-col tw-max-w-14 lg:tw-w-14']"
  ) %>%
  html_text2() %>%
  readr::parse_integer()

# Show top 30
rank[0:30]
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30

 

Crypto Name

# Name
crypto_name <- table %>%
  html_nodes("[class='tw-hidden lg:tw-flex font-bold tw-items-center tw-justify-between']") %>%
  html_text2()

crypto_name[0:30]
##  [1] "Bitcoin"           "Ethereum"          "Tether"           
##  [4] "BNB"               "USD Coin"          "XRP"              
##  [7] "Solana"            "Cardano"           "Terra"            
## [10] "Avalanche"         "Polkadot"          "Dogecoin"         
## [13] "Binance USD"       "TerraUSD"          "Shiba Inu"        
## [16] "Wrapped Bitcoin"   "NEAR Protocol"     "Cronos"           
## [19] "Lido Staked Ether" "Polygon"           "Dai"              
## [22] "Litecoin"          "Cosmos Hub"        "Chainlink"        
## [25] "Bitcoin Cash"      "TRON"              "FTX Token"        
## [28] "LEO Token"         "OKB"               "Stellar"

 

Ticker Symbol

# Ticker
ticker <- table %>%
  html_nodes("[class='d-lg-none font-bold tw-w-12']") %>%
  html_text2()

ticker[0:30]
##  [1] "BTC"   "ETH"   "USDT"  "BNB"   "USDC"  "XRP"   "SOL"   "ADA"   "LUNA" 
## [10] "AVAX"  "DOT"   "DOGE"  "BUSD"  "UST"   "SHIB"  "WBTC"  "NEAR"  "CRO"  
## [19] "STETH" "MATIC" "DAI"   "LTC"   "ATOM"  "LINK"  "BCH"   "TRX"   "FTT"  
## [28] "LEO"   "OKB"   "XLM"

 

Prices

# Extract price:
price <- table %>%
  html_nodes('[class="td-price price text-right pl-0"]') %>%
  html_text2()

price[0:30]
##  [1] "$40,505.71"      "$3,045.67"       "$1.00"           "$416.17"        
##  [5] "$0.999188"       "$0.781021"       "$102.93"         "$0.956710"      
##  [9] "$81.51"          "$77.99"          "$18.50"          "$0.144472"      
## [13] "$0.999752"       "$1.00"           "$0.000025879106" "$40,501.94"     
## [17] "$16.05"          "$0.411197"       "$3,044.92"       "$1.39"          
## [21] "$1.00"           "$113.36"         "$24.30"          "$14.17"         
## [25] "$344.79"         "$0.062043044551" "$42.72"          "$5.83"          
## [29] "$20.66"          "$0.207382"

 

One Hour Percentage Changes

# 1 Hour Change
change_1h <- table %>%
  html_nodes("[class='td-change1h change1h stat-percent text-right col-market']") %>%
  html_text2()

change_1h[0:30]
##  [1] "0.1%"  "0.1%"  "-0.0%" "0.2%"  "-0.1%" "0.2%"  "0.6%"  "0.2%"  "0.2%" 
## [10] "0.5%"  "0.1%"  "0.2%"  "-0.0%" "-0.0%" "0.4%"  "0.1%"  "0.2%"  "0.1%" 
## [19] "0.2%"  "-0.4%" "-0.1%" "0.5%"  "-0.1%" "0.1%"  "0.1%"  "0.0%"  "0.1%" 
## [28] "0.0%"  "-0.3%" "0.3%"

 

Twenty Four Hour Percentage Changes

# 24 Hour Change
change_24h <- table %>%
  html_nodes("[class='td-change24h change24h stat-percent text-right col-market']") %>%
  html_text2

change_24h[0:30]
##  [1] "-0.5%" "-0.1%" "-1.2%" "-1.0%" "-1.2%" "0.3%"  "1.2%"  "0.0%"  "0.4%" 
## [10] "2.6%"  "1.5%"  "-2.2%" "-1.2%" "-1.3%" "-0.7%" "-0.3%" "0.7%"  "0.1%" 
## [19] "0.4%"  "0.1%"  "-1.1%" "1.8%"  "0.2%"  "1.7%"  "1.0%"  "0.2%"  "-0.4%"
## [28] "-0.5%" "2.4%"  "1.3%"

 

Seven Day Percentage Changes

# 7 Day Change
change_7d <- table %>%
  html_nodes("[class='td-change7d change7d stat-percent text-right col-market']") %>%
  html_text2()

change_7d[0:30]
##  [1] "-4.3%"  "-4.6%"  "-0.0%"  "-1.3%"  "0.1%"   "3.5%"   "-6.2%"  "-6.9%" 
##  [9] "-13.6%" "-6.8%"  "-4.5%"  "1.4%"   "0.0%"   "0.1%"   "9.0%"   "-4.2%" 
## [17] "-8.6%"  "-5.8%"  "-4.5%"  "-2.9%"  "-0.2%"  "2.3%"   "-9.5%"  "-6.4%" 
## [25] "6.7%"   "-0.9%"  "-4.5%"  "-0.8%"  "7.2%"   "2.7%"

 

24 Hour Volume

# 24 Hour Volume
volume_24h <- table %>%
  html_nodes("[class='td-liquidity_score lit text-right col-market']") %>%
  html_text2()

volume_24h[0:30]
##  [1] "$17,141,078,695" "$9,325,984,551"  "$32,582,688,503" "$965,638,462"   
##  [5] "$3,811,066,232"  "$2,852,868,049"  "$821,577,710"    "$433,333,442"   
##  [9] "$913,540,057"    "$289,038,797"    "$332,613,399"    "$719,948,258"   
## [13] "$2,167,177,182"  "$351,138,325"    "$340,763,112"    "$129,239,837"   
## [17] "$483,480,810"    "$37,843,945"     "$82,009,930"     "$272,650,587"   
## [21] "$137,385,215"    "$533,073,249"    "$277,770,198"    "$313,656,958"   
## [25] "$273,934,238"    "$893,091,206"    "$97,892,343"     "$1,759,785"     
## [29] "$63,039,831"     "$196,606,123"

 

Market Cap

# Market Cap
market_cap <- table %>%
  html_nodes("[class='td-market_cap cap col-market cap-price text-right']") %>%
  html_text2()

market_cap[0:30]
##  [1] "$770,303,462,673" "$366,630,945,738" "$82,792,742,856"  "$69,936,419,454" 
##  [5] "$50,058,856,536"  "$37,569,475,140"  "$34,298,774,589"  "$30,635,118,620" 
##  [9] "$29,239,478,367"  "$20,918,338,722"  "$20,388,687,382"  "$19,165,614,641" 
## [13] "$17,448,631,823"  "$17,343,901,926"  "$14,210,233,203"  "$11,265,480,083" 
## [17] "$10,804,641,768"  "$10,389,876,302"  "$10,278,286,691"  "$9,578,568,019"  
## [21] "$8,794,606,672"   "$7,942,675,830"   "$7,104,889,918"   "$6,616,560,903"  
## [25] "$6,513,485,570"   "$6,308,052,481"   "$5,855,713,135"   "$5,457,600,547"  
## [29] "$5,373,766,209"   "$5,127,308,929"

 

Create Dataframe

Once the table parts have been obtained, the dataframe can be assembled with the use of data.frame().

# Put together as dataframe:

cg_data <- data.frame(
  Rank = rank,
  Crypto = crypto_name,
  Ticker = ticker,
  Price = price,
  Change_1h = change_1h,
  Change_24h = change_24h,
  Change_7d = change_7d,
  Volume_24h = volume_24h,
  Market_Cap = market_cap
)

# Preview dataframe

head(cg_data, 10)
##    Rank    Crypto Ticker      Price Change_1h Change_24h Change_7d
## 1     1   Bitcoin    BTC $40,505.71      0.1%      -0.5%     -4.3%
## 2     2  Ethereum    ETH  $3,045.67      0.1%      -0.1%     -4.6%
## 3     3    Tether   USDT      $1.00     -0.0%      -1.2%     -0.0%
## 4     4       BNB    BNB    $416.17      0.2%      -1.0%     -1.3%
## 5     5  USD Coin   USDC  $0.999188     -0.1%      -1.2%      0.1%
## 6     6       XRP    XRP  $0.781021      0.2%       0.3%      3.5%
## 7     7    Solana    SOL    $102.93      0.6%       1.2%     -6.2%
## 8     8   Cardano    ADA  $0.956710      0.2%       0.0%     -6.9%
## 9     9     Terra   LUNA     $81.51      0.2%       0.4%    -13.6%
## 10   10 Avalanche   AVAX     $77.99      0.5%       2.6%     -6.8%
##         Volume_24h       Market_Cap
## 1  $17,141,078,695 $770,303,462,673
## 2   $9,325,984,551 $366,630,945,738
## 3  $32,582,688,503  $82,792,742,856
## 4     $965,638,462  $69,936,419,454
## 5   $3,811,066,232  $50,058,856,536
## 6   $2,852,868,049  $37,569,475,140
## 7     $821,577,710  $34,298,774,589
## 8     $433,333,442  $30,635,118,620
## 9     $913,540,057  $29,239,478,367
## 10    $289,038,797  $20,918,338,722
tail(cg_data, 10)
##     Rank                  Crypto Ticker           Price Change_1h Change_24h
## 91    91                  Stacks    STX           $1.18     -0.1%      -1.7%
## 92    92 Synthetix Network Token    SNX           $5.28      0.2%      -6.0%
## 93    93                    Dash   DASH         $108.02      0.6%      -1.5%
## 94    94                Loopring    LRC       $0.902922      1.0%       1.7%
## 95    95   Basic Attention Token    BAT       $0.750673     -0.3%       3.6%
## 96    96                  BitDAO    BIT           $1.22     -0.1%       0.9%
## 97    97           Mina Protocol   MINA           $2.43      1.2%       5.1%
## 98    98                     Amp    AMP $0.022802951520     -0.1%       0.5%
## 99    99              Pax Dollar   USDP       $0.999416     -0.3%      -1.4%
## 100  100               GateToken     GT           $6.46      0.1%      -3.1%
##     Change_7d   Volume_24h     Market_Cap
## 91      -5.4%   $8,466,377 $1,240,403,021
## 92       1.1%  $63,421,108 $1,153,124,426
## 93      -2.6% $159,006,163 $1,152,774,284
## 94      -6.2%  $65,595,451 $1,125,649,097
## 95       1.2% $107,388,722 $1,125,379,206
## 96      -0.5%  $37,169,365 $1,122,926,453
## 97     -24.3%  $48,122,248 $1,120,956,031
## 98      -7.0%   $6,409,967 $1,094,167,291
## 99      -0.1% $469,353,790 $1,005,544,802
## 100     -2.6%   $5,712,497   $980,947,557

 

# Data types for columns
str(cg_data)
## 'data.frame':    100 obs. of  9 variables:
##  $ Rank      : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Crypto    : chr  "Bitcoin" "Ethereum" "Tether" "BNB" ...
##  $ Ticker    : chr  "BTC" "ETH" "USDT" "BNB" ...
##  $ Price     : chr  "$40,505.71" "$3,045.67" "$1.00" "$416.17" ...
##  $ Change_1h : chr  "0.1%" "0.1%" "-0.0%" "0.2%" ...
##  $ Change_24h: chr  "-0.5%" "-0.1%" "-1.2%" "-1.0%" ...
##  $ Change_7d : chr  "-4.3%" "-4.6%" "-0.0%" "-1.3%" ...
##  $ Volume_24h: chr  "$17,141,078,695" "$9,325,984,551" "$32,582,688,503" "$965,638,462" ...
##  $ Market_Cap: chr  "$770,303,462,673" "$366,630,945,738" "$82,792,742,856" "$69,936,419,454" ...

As an optional thing, you can save the Top100 Crypto prices dataframe into a .csv file with write.csv().

# Data when function is declared
Sys.Date()
## [1] "2022-04-16"

 

# Save into .csv file:

write.csv(cg_data, paste("Coingecko_Prices_", Sys.Date(), '.csv', sep = ""), row.names = FALSE)