-
Notifications
You must be signed in to change notification settings - Fork 0
/
.Rhistory
191 lines (191 loc) · 7.09 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
usethis::use_github()
usethis::git_default_branch_rediscover()
url <- "https://api.fda.gov/food/enforcement.json?limit=5"
con <- httr::GET(url) |> httr::content()
View(con)
con <- con$results
df <- rectangularize(con)
#'
#' @param content A list containing JSON data parsed into R format.
#'
#' @return A data frame with JSON data bound and all nested list columns unnested
#' to achieve a tidy format.
#'
#' @examples
#' # Sample JSON data parsed as an R list
#'
#' @export
rectangularize <- function(content){
# Stop if content is not a list
if (!is.list(content)){
stop("Error: Please provide content in list format.")
}
# Check if content contains 1 observation (=> simple rbind; 1 row df)
if(all(sapply(content, is.list))){
con <- do.call(rbind, content) |> tibble::as_tibble()
con <- unnest_recursively(con)
return(con)
}else{ # ...or more (=> do.call(rbind, l); multiple rows df)
con <- rbind(content) |> as.data.frame()
con <- unnest_recursively(con)
return(con)
}
}
#' )
#' )
#'
#' # Use do.call("rbind") to transform into data.frame
#' df <- do.call("rbind", l) |> as.data.frame()
#'
#' # Unnest recursively:
#' unnested_df <- unnest_recursively(df)
#'
#' @export
unnest_recursively <- function(df) {
# Separate list columns from non-list columns
df_l <- df |> dplyr::select_if(is.list)
df_nl <- df |> dplyr::select_if(~!is.list(.x))
# Check if there is something to unnest
if (ncol(df_l) == 0){
return(df)
} else {
# Check which list columns are deeper nested
max_lengths <- function(x) max(lengths(x))
v <- sapply(df_l, max_lengths)
cols <- v[v > 1] |> names()
if (length(cols) == 0) { # "list" columns, but not deeper nested
df_unnested <- df_l |>
dplyr::select(-tidyselect::all_of(cols)) |>
tidyr::unnest(cols = tidyselect::everything(), keep_empty = TRUE, names_repair = "minimal", names_sep = "_")
# Bind with regular columns if any
if (ncol(df_nl > 0)){
df_unnested <- dplyr::bind_cols(df_nl, df_unnested)
}
return(df_unnested)
} else {
df_unnested <- df_l |>
dplyr::select(-tidyselect::all_of(cols)) |>
tidyr::unnest(cols = tidyselect::everything(), keep_empty = TRUE, names_repair = "minimal", names_sep = "_")
# Bind with regular columns if any
if (ncol(df_nl > 0)){
df_unnested <- dplyr::bind_cols(df_nl, df_unnested)
}
df_remaining <- df |> dplyr::select(tidyselect::all_of(cols))
df_remaining_unnested <- df_remaining |>
tidyr::unnest_wider(tidyselect::all_of(cols), strict = FALSE, names_repair = "minimal", names_sep = "_")
# Recur on the remaining nested columns
df_unnested <- dplyr::bind_cols(df_unnested, unnest_recursively(df_remaining_unnested))
return(df_unnested)
}
}
}
df <- rectangularize(con)
View(df)
View(con)
url <- "http://musicbrainz.org/ws/2/artist/5b11f4ce-a62d-471e-81fc-a69a8278c7da?fmt=json"
con <- httr::GET(url) |> httr::content()
con <- con$results
con <- httr::GET(url) |> httr::content()
View(con)
df <- rectangularize(con)
View(df)
View(con)
dois <- c("10.17645/pag.v10i1.4712",
"10.17645/mac.v10i4.5690")
crossref_json <- rcrossref::cr_works(dois = dois, parse = F)
dois <- c("10.17645/pag.v10i1.4712",
"10.17645/mac.v10i4.5690")
crossref_json <- rcrossref::cr_works(dois = dois, parse = F)
crossref_json <- rcrossref::cr_works_(dois = dois, parse = F)
View(crossref_json)
df <- rectangularize(crossref_json)
View(df)
jsonlite::validate(crossref_json)
View(crossref_json)
url <- "http://universities.hipolabs.com/search?country=United+Kingdom"
con <- httr::GET(url) |> httr::content()
View(con)
df <- rectangularize(con)
View(df)
url <- "http://universities.hipolabs.com/search?country=Germany"
con <- httr::GET(url) |> httr::content()
df <- rectangularize(con)
View(df)
con2 <- httr::GET(url2) |> httr::content()
url2 <- "https://archive.org/wayback/available?url=google.com"
con2 <- httr::GET(url2) |> httr::content()
View(con2)
con2 <- con2$archived_snapshots
df2 <- rectangularize(con2)
View(df2)
url3 <- "https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2023-03-01&endtime=2023-03-02&minmagnitude=5"
con3 <- httr::GET(url3) |> httr::content()
View(con3)
df3 <- rectangularize(con3)
View(df3)
View(con3)
con3 <- con3$features
df3 <- rectangularize(con3)
View(df3)
con <- httr::GET(url) |> httr::content(as = "raw") |> jsonlite::parse_json()
url <- "http://universities.hipolabs.com/search?country=Germany"
con <- httr::GET(url) |> httr::content(as = "raw") |> jsonlite::parse_json()
con <- httr::GET(url) |> httr::content(as = "text") |> jsonlite::parse_json()
con <- httr::GET(url) |> httr::content(as = "text", encoding = "UTF-8") |> jsonlite::parse_json()
#con <- con$results
df <- rectangularize(con)
View(df)
# In the first example we use the "earthquake.usgs.gov" API to get data about some earthquakes:
url <- "https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime=2023-03-01&endtime=2023-03-02&minmagnitude=5"
# Call the API and extract the content using httr
con <- httr::GET(url) |> httr::content()
# Call the API and extract the content using httr
con <- httr::GET(url) |> httr::content()
# Note that httr::content already parses the JSON data into a "list" R-object.
# Often, APIs return a number of things. Pick the content that interests you:
content <- con$features
# Feed the list object into rectangularize() and voila:
df <- rectangularize(content)
# Feed the list object into rectangularize() and voila:
df <- rectangularize(content)
dplyr::glimpse(df)
# Let's get some information about German universities:
url_2 <- "http://universities.hipolabs.com/search?country=Germany"
# Let's use this example to demonstrate how to deal with JSON data that is plain text:
con_2 <- httr::GET(url_2) |> httr::content(as = "text", encoding = "UTF-8")
# Let's use this example to demonstrate how to deal with JSON data that is plain text:
con_2 <- httr::GET(url_2) |> httr::content(as = "text", encoding = "UTF-8")
# Use jsonlite::parse_json() to parse text as an R 'list' object:
con_2 <- con_2 |> jsonlite::parse_json()
# Here, we can use the output right away for rectangularize():
df_2 <- rectangularize(con_2)
dplyr::glimpse(df_2)
usethis::use_package("httr", type = "Suggests")
usethis::use_package("jsonlite", type = "Suggests")
usethis::use_logo("C:/nextcloud/Dissertation/3_Projects/fleece_prepare/fleece_logo.png", geometry = "528x567", retina = T)
devtools::build_readme()
rm(list = ls())
devtools::build_readme()
devtools::build_readme()
devtools::load_all()
# Let's get some information about German universities:
url_2 <- "http://universities.hipolabs.com/search?country=Germany"
# Let's use this example to demonstrate how to deal with JSON data that is plain text:
con_2 <- httr::GET(url_2) |> httr::content(as = "text", encoding = "UTF-8")
# Let's use this example to demonstrate how to deal with JSON data that is plain text:
con_2 <- httr::GET(url_2) |> httr::content(as = "text", encoding = "UTF-8")
# Use jsonlite::parse_json() to parse text as an R 'list' object:
con_2 <- con_2 |> jsonlite::parse_json()
# Here, we can use the output right away for rectangularize():
df_2 <- rectangularize(con_2)
View(df_2)
dplyr::glimpse(df_2)
devtools::build_readme()
.Last.error
devtools::check()
usethis::use_ccby_license()
devtools::build_readme()
devtools::build_readme()
devtools::build_readme()
devtools::check()
devtools::check()