2019-05
ggplot2
in R
Bar
Line
Histogram
Point
Heatmap
aes
thetic)geom
etric)scale
)stat
istics)coord
inante)facet
)theme
)ggplot(data=..., aes(x=..., y=...)) + geom_xxx(...) + stat_xxx(...) + facet_xxx(...) + ...
ggplot
描述 data 從哪來aes
描述圖上的元素跟 data 之類的對應關係geom_xxx
描述要畫圖的類型及相關調整的參數geom_bar
, geom_line
, geom_points
, …library(ggplot2) # list all geom ls(pattern = '^geom_', env = as.environment('package:ggplot2'))
[1] "geom_abline" "geom_area" "geom_bar" [4] "geom_bin2d" "geom_blank" "geom_boxplot" [7] "geom_col" "geom_contour" "geom_count" [10] "geom_crossbar" "geom_curve" "geom_density" [13] "geom_density_2d" "geom_density2d" "geom_dotplot" [16] "geom_errorbar" "geom_errorbarh" "geom_freqpoly" [19] "geom_hex" "geom_histogram" "geom_hline" [22] "geom_jitter" "geom_label" "geom_line" [25] "geom_linerange" "geom_map" "geom_path" [28] "geom_point" "geom_pointrange" "geom_polygon" [31] "geom_qq" "geom_qq_line" "geom_quantile" [34] "geom_raster" "geom_rect" "geom_ribbon" [37] "geom_rug" "geom_segment" "geom_sf" [40] "geom_sf_label" "geom_sf_text" "geom_smooth" [43] "geom_spoke" "geom_step" "geom_text" [46] "geom_tile" "geom_violin" "geom_vline"
data.frame
儲存資料 (不可以丟 matrix 物件)wide format
to long format
install.packages(c("tidyr","dplyr","ggplot2"))
library
才能使用喲!)library(ggplot2) library(dplyr) library(tidyr)
or
install.packages("tidyverse")
library
才能使用喲!)library(tidyverse)
############### 相對路徑 ############### # 瞭解現在我們所處在的路徑 getwd() # 設定我們檔案存放的路徑 setwd() # 讀檔起手式 data <- read.csv("transaction.csv") # 若讀入的是亂碼,試試以下 data <- read.csv("transaction.csv",fileEncoding = 'big5') #如果你是mac data <- read.csv("transaction.csv",fileEncoding = 'utf-8') #如果你是windows
area_land | price_total | build_type | area_park | area_build | trac_year |
---|---|---|---|---|---|
35.0 | 6380000 | 公寓(5樓含以下無電梯) | 0.0 | 61 | 102 |
10.7 | 12010000 | 住宅大樓(11層含以上有電梯) | 0.0 | 104 | 102 |
8.5 | 10080000 | 套房(1房1廳1衛) | 8.6 | 52 | 102 |
4.7 | 4600000 | 住宅大樓(11層含以上有電梯) | 0.0 | 39 | 102 |
31.0 | 23800000 | 華廈(10層含以下有電梯) | 0.0 | 185 | 102 |
英文欄位名稱 | 中文欄位名稱 |
---|---|
city | 縣市 |
district | 鄉鎮市區 |
trac_year | 交易年份 |
trac_month | 交易月份 |
trac_type | 交易標的 |
trac_content | 交易筆棟數 |
use_type | 使用分區或編定 |
英文欄位名稱 | 中文欄位名稱 |
---|---|
build_type | 建物型態 |
build_ymd | 建築完成年月 |
area_land | 土地移轉總面積.平方公尺. |
area_build | 建物移轉總面積.平方公尺. |
area_park | 車位移轉總面積.平方公尺. |
price_total | 總價.元. |
price_unit | 單價.元.平方公尺. |
str(data)
'data.frame': 153598 obs. of 15 variables: $ city : Factor w/ 4 levels "高雄市","臺北市",..: 2 2 2 2 2 2 2 2 2 2 ... $ district : Factor w/ 99 levels "阿蓮區","八里區",..: 63 96 96 5 96 5 5 38 38 42 ... $ trac_year : int 102 102 102 102 102 102 102 102 102 102 ... $ trac_month : Factor w/ 12 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ... $ trac_type : Factor w/ 2 levels "房地(土地+建物)",..: 1 1 2 1 1 2 2 2 2 2 ... $ trac_content: Factor w/ 327 levels "土地0建物0車位0",..: 57 57 58 57 57 62 62 58 64 167 ... $ use_type : Factor w/ 5 levels "工","農","其他",..: 5 4 4 4 5 5 5 5 1 5 ... $ build_type : Factor w/ 12 levels "辦公商業大樓",..: 6 12 10 12 7 7 7 12 1 12 ... $ build_ymd : int 701109 701228 970114 851218 970624 1010724 1010724 1000414 1010531 870910 ... $ area_land : num 34.96 10.71 8.51 4.7 30.97 ... $ area_build : num 60.6 104.5 51.9 39.4 185.2 ... $ area_park : num 0 0 8.55 0 0 ... $ price_total : num 6380000 12010000 10080000 4600000 23800000 ... $ price_unit : int 105263 114928 194070 116900 128510 218147 204716 174613 133648 27658 ... $ age : int 32 32 5 17 5 1 1 2 1 15 ...
summary(data)
city district trac_year trac_month 高雄市:34460 淡水區 : 7172 Min. :102 12 :15206 臺北市:24238 西屯區 : 5974 1st Qu.:102 5 :15079 臺中市:37482 新莊區 : 5955 Median :102 4 :14682 新北市:57418 北屯區 : 5881 Mean :102 3 :14523 新店區 : 5873 3rd Qu.:102 7 :13805 中和區 : 5719 Max. :102 6 :13392 (Other):117024 (Other):66911 trac_type trac_content use_type 房地(土地+建物) :91613 土地1建物1車位0:66792 工 : 3233 房地(土地+建物)+車位:61985 土地1建物1車位1:41031 農 : 577 土地2建物1車位0:14537 其他: 8206 土地1建物1車位2: 7195 商 : 26205 土地2建物1車位1: 4787 住 :115377 土地3建物1車位0: 4691 (Other) :14565 build_type build_ymd area_land 住宅大樓(11層含以上有電梯):70725 Min. : 100602 Min. : 0 公寓(5樓含以下無電梯) :23211 1st Qu.: 780326 1st Qu.: 13 透天厝 :21954 Median : 870506 Median : 22 華廈(10層含以下有電梯) :20365 Mean : 868754 Mean : 42 套房(1房1廳1衛) : 9709 3rd Qu.: 991201 3rd Qu.: 36 店面(店鋪) : 2888 Max. :1030313 Max. :127088 (Other) : 4746 area_build area_park price_total price_unit Min. : 0 Min. : 0 Min. : 0 Min. : 0 1st Qu.: 85 1st Qu.: 0 1st Qu.: 4900000 1st Qu.: 42685 Median : 124 Median : 0 Median : 8400000 Median : 67880 Mean : 153 Mean : 25 Mean : 12879580 Mean : 86176 3rd Qu.: 179 3rd Qu.: 9 3rd Qu.: 14580000 3rd Qu.: 111173 Max. :79669 Max. :2400000 Max. :8800000000 Max. :4284119 NA's :461 age Min. :-1 1st Qu.: 3 Median :15 Mean :15 3rd Qu.:24 Max. :92
geom_bar
比較
看看2013年在各縣市的案件交易量thm <- function() theme(text = element_text(size = 15, family = "SimHei")) # 控制字體與大小 # SimHei是只有Mac才有的字體, 用來解決Mac系統中文顯示錯誤的問題 # Windows系統使用者請忽略 `+ thm()` 指令 data %>% ggplot(aes(x = city)) + geom_bar(stat = "count") + thm() # stat = "count" 算個數
data %>% ggplot(aes(x = city)) + geom_bar(stat = "count") + thm() + labs(title = "各縣市交易量", x = "縣市", y = "交易量") # lab用來幫圖形的標題、x軸與y軸做命名
color
vs fill
?data %>% ggplot(aes(x = city)) + geom_bar(stat = "count") + thm() + labs(title = "各縣市交易量", x = "縣市", y = "交易量") + geom_bar(fill = 'snow', color = 'black') # see colors() if you're picky
比較
臺北市各行政區的案件交易量# 資料整理 table = data %>% filter(city == "臺北市") %>% group_by(district) %>% summarise(Count = n()) # dplyr::n 用來計數 table
# A tibble: 12 x 2 district Count <fct> <int> 1 北投區 2416 2 大安區 1755 3 大同區 1092 4 南港區 989 5 內湖區 3598 6 士林區 1859 7 松山區 1556 8 萬華區 1584 9 文山區 2197 10 信義區 1585 11 中山區 4410 12 中正區 1197
table %>% ggplot(aes(x = district, y = Count)) + geom_bar(stat = "identity") + thm() # stat='identity'以表格的值做為bar的高度
table %>% ggplot(aes(x = reorder(district, -Count), y = Count)) + geom_bar(stat = 'identity') + thm() + labs(titles = "臺北市各行政區交易量", x = "行政區", y = "Count")
比較
臺北市&高雄市的各使用分區或編定(use_type
)所佔比例# A tibble: 10 x 4 # Groups: city [2] city use_type Count rate <fct> <fct> <int> <dbl> 1 高雄市 工 196 0.01 2 高雄市 農 123 0 3 高雄市 其他 532 0.02 4 高雄市 商 7582 0.22 5 高雄市 住 26027 0.76 6 臺北市 工 442 0.02 7 臺北市 農 3 0 8 臺北市 其他 1709 0.07 9 臺北市 商 7323 0.3 10 臺北市 住 14761 0.61
table = data %>% filter(city == "臺北市" | city == "高雄市" ) %>% group_by(city, use_type) %>% summarise(Count = n()) %>% mutate(rate = round(Count/sum(Count), 2))
stack
table %>% ggplot(aes(x = city, y = Count, fill = use_type)) + geom_bar(stat = 'identity', position = 'stack') + thm() # stack類別堆疊
dodge
table %>% ggplot(aes(x = city, y = rate, fill = use_type)) + geom_bar(stat = 'identity', position = 'dodge') + # dodge類別並排 thm() + scale_fill_discrete(name ="使用分區或編定") # 設定圖例的顯示
geom_line
table = data %>% group_by(trac_month) %>% summarise(Count=n()) table
# A tibble: 12 x 2 trac_month Count <fct> <int> 1 1 10367 2 2 7871 3 3 14523 4 4 14682 5 5 15079 6 6 13392 7 7 13805 8 8 11714 9 9 10814 10 10 13166 11 11 12979 12 12 15206
table %>% ggplot(aes(x = trac_month, y = Count, group = 1)) + geom_line() + thm() + labs(x = "交易月份" , y = "交易數量")
Multiple
Linetable = data %>% group_by(city, trac_month) %>% summarise(Count = n()) table
# A tibble: 48 x 3 # Groups: city [?] city trac_month Count <fct> <fct> <int> 1 高雄市 1 2511 2 高雄市 2 1897 3 高雄市 3 3355 4 高雄市 4 3524 5 高雄市 5 3378 6 高雄市 6 3097 7 高雄市 7 3063 8 高雄市 8 2573 9 高雄市 9 2354 10 高雄市 10 2590 # ... with 38 more rows
Multiple
Linetable %>% ggplot(aes(x = trac_month, y = Count, group = city, color = city)) + geom_line() + geom_point() + thm() + labs(x = "交易月份" , y = "交易數量")
Multiple
Line plotdata %>% filter(city == "臺北市" | city == "高雄市" ) %>% group_by(city, age) %>% summarise(Count = n()) %>% ggplot(aes(x = age, y = Count, group = city, color = city)) + geom_line() + geom_point() + thm()
不合理
data %>% filter(city == "臺北市" | city == "高雄市" ) %>% group_by(city, age) %>% summarise(Count = n()) %>% ggplot(aes(x = age, y = Count, group = city, color = city)) + geom_line() + geom_point() + thm() + coord_cartesian(xlim = c(-3, 10))
geom_histogram
data %>% ggplot(aes(x = age, y =..count..)) + geom_histogram()
aes(y=..count..)
vs. aes(y=..density..)
data %>% ggplot(aes(x = age, y =..density..)) + geom_histogram()
data %>% ggplot(aes(x = age, y =..density.., fill =..count..)) + # fill 依指定欄位填色 geom_histogram(binwidth = .5)
geom_histogram()
+ geom_density()
data %>% ggplot(aes(x = age, y = ..density..)) + geom_histogram(color = "black", fill = "white") + geom_density(alpha = .2, fill = "#FF6666") # alpha設定透明度
geom_point
iris %>% ggplot(aes(x = Sepal.Length, y = Petal.Length, color = Species)) + geom_point(shape = 1, size = 2) # shape控制圖示;size控制點的大小
ggplot2
iris %>% ggplot(aes(x = Sepal.Length, y = Petal.Length, color = Species, shape = Species, size = Species)) + geom_point() + scale_shape_manual(values = c(1,5,7)) + # 控制 shape 顯示圖示 scale_size_manual(values = c(1,2,3)) # 控制圖示 size 顯示大小
install.packages("devtools")
library(ggthemr) ggthemr('flat') ans = data %>% filter(district == "文山區" | district == "士林區" |district == "信義區") %>% filter(build_type == "套房(1房1廳1衛)") %>% ggplot(aes(x = age, y = price_unit, color = district)) + geom_point(position = "jitter") + thm() ans
# 挑出前四欄位 dat <- iris %>% select(1:4) dat %>% head()
Sepal.Length Sepal.Width Petal.Length Petal.Width 1 5.1 3.5 1.4 0.2 2 4.9 3.0 1.4 0.2 3 4.7 3.2 1.3 0.2 4 4.6 3.1 1.5 0.2 5 5.0 3.6 1.4 0.2 6 5.4 3.9 1.7 0.4
寬轉長
計算相關性p = dat %>% cor() %>% as.data.frame() %>% mutate(names = row.names(.)) %>% gather(class, cor, 1:4) p %>% head(10)
names class cor 1 Sepal.Length Sepal.Length 1.00 2 Sepal.Width Sepal.Length -0.12 3 Petal.Length Sepal.Length 0.87 4 Petal.Width Sepal.Length 0.82 5 Sepal.Length Sepal.Width -0.12 6 Sepal.Width Sepal.Width 1.00 7 Petal.Length Sepal.Width -0.43 8 Petal.Width Sepal.Width -0.37 9 Sepal.Length Petal.Length 0.87 10 Sepal.Width Petal.Length -0.43
p %>% ggplot(aes(x = names, y = class, fill= cor)) + geom_tile() + labs(x = "", y = "") + scale_fill_gradient2(limits = c(-1, 1))
stat
isticslibrary(ggthemr) ggthemr('flat') iris %>% ggplot(aes(x = Sepal.Length, y = Petal.Length)) + geom_point() + stat_smooth(method = lm, level = .95) # add se=FALSE to disable CI
facet
table <- data %>% group_by(city, trac_month) %>% # 選擇縣市、交易月份作為分群 summarise(total = n()) # 計算分群下的總數 table %>% ggplot(aes(x = trac_month, y = total ,fill = city))+ geom_bar(stat = 'identity') + thm()
facet
table %>% ggplot(aes(x = trac_month, y = total ,fill = city))+ geom_bar(stat = 'identity') + thm() + facet_wrap( ~city , nrow = 2) + labs(x = "交易月份", y="交易量")
coord
_flip()table %>% ggplot(aes(x = trac_month, y = total, fill = city)) + geom_bar(stat = 'identity') + thm() + facet_wrap( ~ city, nrow = 2)+ labs(x = "交易月份", y = "交易量") + coord_flip()
ggsave('檔案名稱') ggsave("plot.pdf", width = 4, height = 4) ggsave("plot.png", width = 4, height = 4, dpi = 300)
Plotly
Python
、R
、Malab
、…等等CRAN
內下載# Plotly is now on CRAN! install.packages("plotly") # install the latest development version (on GitHub) via devtools
或是
從github
上下載,但前提是先
安裝devtools
# install.packages("devtools") devtools::install_github("ropensci/plotly")
library
!library(plotly)
ggplot2
ans = data %>% filter(district == "文山區" | district == "士林區" |district == "信義區") %>% filter(build_type == "套房(1房1廳1衛)") %>% ggplot(aes(x = age, y = price_unit, color = district)) + geom_point(position = "jitter") + thm() ans
ggplotly
& Resize
ggplotly(ans, height = 400, width = 1000)
了解自己的需求,詢問關鍵字與函數
歡迎來信 leslie.li@dsp.im 與我一起交流!
03-RVisualization-03-ggplot2
03-RVisualization-01-One-Variable-Visualization
03-RVisualization-02-Multiple-Variables-Visualization
03-RVisualization-04-Javascript-And-Maps