This blog has relocated to https://coolbutuseless.github.ioand associated packages are now hosted at https://github.com/coolbutuseless.

29 April 2018

mikefc

Comparison of R object encoding with YAML, jsonlist and RJSONIO

As described in a prior post I need to save some configuration objects from R into human-readable, human-editable format.

I’d previously decided that YAML was the best for my purposes, but I recently discovered that YAML and jsonlite won’t keep the names of a named vector, whereas RJSONIO will.

In response to this discovery, I’ve decided to check a large number of standard R object types output by the three packages (yaml, jsonlite and RJSONIO) to get a better picture of their strengths and weaknesses.

Below, I set up a list of all the object I want to test and then test each package encoding/decoding the object and then comparing it to the original.

Set up the format specification and the list of R objects to test

#-----------------------------------------------------------------------------
# 3 formats and their encode/decode functions
#-----------------------------------------------------------------------------
format <- list(
  jsonlite = list(
    encode = jsonlite::toJSON,
    decode = jsonlite::fromJSON
  ),
  RJSONIO = list(
    encode = RJSONIO::toJSON,
    decode = RJSONIO::fromJSON
  ),
  yaml = list(
    encode = yaml::as.yaml,
    decode = yaml::yaml.load
  )
)


#-----------------------------------------------------------------------------
# List of different R objects
#-----------------------------------------------------------------------------
r_objects = list(
  
  null                    = NULL,
  empty_list              = list(),
  list                    = list(a=1, b='hello'),
  `NA`                    = NA,
  numeric_NA              = NA_real_,
  integer_NA              = NA_integer_,
  character_NA            = NA_character_,
  
  boolean                 = TRUE,
  numeric                 = 12.3,
  integer                 = 1L,
  character               = 'hello',
  
  boolean_vec             = c(TRUE, FALSE, FALSE),
  numeric_vec             = c(1.23, 4.56),
  integer_vec             = c(1L, 5L, 3L),
  character_vec           = c('a', 'b', 'c'),
  
  boolean_named_vec       = c(a=TRUE, b=FALSE, c=FALSE),
  numeric_named_vec       = c(a=1.23, b=4.56),
  integer_named_vec       = c(a=1L, b=5L, c=3L),
  character_named_vec     = c(a='a', b='b', c='c'),
  
  boolean_vec_with_na     = c(TRUE, FALSE, NA),
  numeric_vec_with_na     = c(1.23, NA),
  integer_vec_with_na     = c(1L, 2L, NA),
  character_vec_with_na   = c('a', 'b', NA),
  
  boolean_list            = list(TRUE, FALSE, FALSE),
  numeric_list            = list(1.23, 4.56),
  integer_list            = list(1L, 5L, 3L),
  character_list          = list('a', 'b', 'c'),
  
  boolean_list_with_null   = list(TRUE, FALSE, NULL),
  numeric_list_with_null   = list(1.23, NULL),
  integer_list_with_null   = list(1L, 5L, NULL),
  character_list_with_null = list('a', 'b', NULL),
  
  boolean_named_list      = list(a=TRUE, b=FALSE, c=FALSE),
  numeric_named_list      = list(a=1.23, b=4.56),
  integer_named_list      = list(a=1L, b=5L, c=3L),
  character_named_list    = list(a='a', b='b', c='c'),
  
  boolean_matrix          = matrix(TRUE, nrow=2, ncol=2),
  numeric_matrix          = matrix(1.2 , nrow=2, ncol=2),
  integer_matrix          = matrix(1L  , nrow=2, ncol=2),
  character_matrix        = matrix('a' , nrow=2, ncol=2),
  
  data.frame              = data.frame(a=1L, b=2.1, c='c', stringsAsFactors = FALSE)
)

Test each format encoding/decoding each R object

#-----------------------------------------------------------------------------
# Capture a string representation of an object
#-----------------------------------------------------------------------------
cap <- function(x) {
  deparse(x, control = c("keepInteger", "keepNA"))
}


sanitize <- function(x) {
  x <- gsub("\\.\\.\\.", '', x)
  x <- gsub("\n", "\\\\\\\\n", stringr::str_trim(x))
  x
}

#-----------------------------------------------------------------------------
# encode/decode the given r object using the given format
#-----------------------------------------------------------------------------
test_encode_decode <- function(format_name, r_object_name) {
  encode      <- format[[format_name]]$encode
  decode      <- format[[format_name]]$decode
  
  
  original_object <- r_objects[[r_object_name]]
  encoded_object  <- encode(original_object)
  decoded_object  <- decode(encoded_object)
  
  data_frame(
    format                 = format_name,
    name                   = r_object_name,
    original               = cap(original_object),
    decoded                = cap(decoded_object),
    original_class         = class(original_object),
    decoded_class          = class(decoded_object),
    identical              = identical(original_object, decoded_object),
    encoded_representation = sanitize(encoded_object)
  )
}


#-----------------------------------------------------------------------------
# Generate all results
#-----------------------------------------------------------------------------
all_results <- dplyr::bind_rows(
  names(r_objects) %>% purrr::map(~test_encode_decode('yaml'    , .x)) %>% bind_rows() %>% as.data.frame(),
  names(r_objects) %>% purrr::map(~test_encode_decode('jsonlite', .x)) %>% bind_rows() %>% as.data.frame(),
  names(r_objects) %>% purrr::map(~test_encode_decode('RJSONIO' , .x)) %>% bind_rows() %>% as.data.frame()
)

all_results %<>% mutate(
  name = factor(name, levels = unique(name))
) %>% arrange(name, format)

Results

The list of all results is at the end of this post in the appendix.

On a raw count, yaml has the most number of decoded objects identical to the original.

Table 1: Count of identical decoded objects by format
format FALSE TRUE
jsonlite 17 23
RJSONIO 27 13
yaml 13 27
Table 2: Comparison of formats: was the decoded object using this package identical to the original
name jsonlite RJSONIO yaml
null Y
empty_list Y Y
list Y Y
NA Y Y
numeric_NA Y
integer_NA Y
character_NA Y
boolean Y Y Y
numeric Y Y Y
integer Y Y
character Y Y Y
boolean_vec Y Y Y
numeric_vec Y Y Y
integer_vec Y Y
character_vec Y Y Y
boolean_named_vec Y
numeric_named_vec Y
integer_named_vec
character_named_vec Y
boolean_vec_with_na Y Y
numeric_vec_with_na Y Y
integer_vec_with_na Y Y
character_vec_with_na Y Y
boolean_list
numeric_list
integer_list
character_list
boolean_list_with_null Y Y
numeric_list_with_null Y Y
integer_list_with_null Y
character_list_with_null Y Y
boolean_named_list Y Y
numeric_named_list Y Y
integer_named_list Y Y
character_named_list Y Y
boolean_matrix Y
numeric_matrix Y
integer_matrix Y
character_matrix Y
data.frame Y

Notes

  • YAML seems to have the best support for the basic types (i.e. the first 10 R objects tested)
  • All formats convert an unnamed list of a single type (e.g. all numerics) into a vector
  • Named vectors are only supported by RJSONIO
  • Only jsonlite seems to support matrices

Conclusion?

It depends.

YAML still the best for my purposes.

Appendix: All results

knitr::kable(all_results, caption="All results")
Table 3: All results
format name original decoded original_class decoded_class identical encoded_representation
jsonlite null NULL list() NULL list FALSE {}
RJSONIO null NULL list(NULL) NULL list FALSE [ null ]
yaml null NULL NULL NULL NULL TRUE ~
jsonlite empty_list list() list() list list TRUE []
RJSONIO empty_list list() list() list AsIs FALSE []
yaml empty_list list() list() list list TRUE []
jsonlite list list(a = 1, b = “hello”) list(a = 1L, b = “hello”) list list FALSE {“a”:[1],“b”:[“hello”]}
RJSONIO list list(a = 1, b = “hello”) list(a = 1, b = “hello”) list list TRUE {\n “a”: 1,\n“b”: “hello” \n}
yaml list list(a = 1, b = “hello”) list(a = 1, b = “hello”) list list TRUE a: 1.0\nb: hello
jsonlite NA NA NA logical logical TRUE [null]
RJSONIO NA NA list(NULL) logical list FALSE [ null ]
yaml NA NA NA logical logical TRUE .na
jsonlite numeric_NA NA_real_ NA numeric logical FALSE [“NA”]
RJSONIO numeric_NA NA_real_ list(NULL) numeric list FALSE [ null ]
yaml numeric_NA NA_real_ NA_real_ numeric numeric TRUE .na.real
jsonlite integer_NA NA_integer_ NA integer logical FALSE [“NA”]
RJSONIO integer_NA NA_integer_ list(NULL) integer list FALSE [ null ]
yaml integer_NA NA_integer_ NA_integer_ integer integer TRUE .na.integer
jsonlite character_NA NA_character_ NA character logical FALSE [null]
RJSONIO character_NA NA_character_ list(NULL) character list FALSE [ null ]
yaml character_NA NA_character_ NA_character_ character character TRUE .na.character
jsonlite boolean TRUE TRUE logical logical TRUE [true]
RJSONIO boolean TRUE TRUE logical logical TRUE [ true ]
yaml boolean TRUE TRUE logical logical TRUE yes
jsonlite numeric 12.3 12.3 numeric numeric TRUE [12.3]
RJSONIO numeric 12.3 12.3 numeric numeric TRUE [ 12.3 ]
yaml numeric 12.3 12.3 numeric numeric TRUE 12.3
jsonlite integer 1L 1L integer integer TRUE [1]
RJSONIO integer 1L 1 integer numeric FALSE [ 1 ]
yaml integer 1L 1L integer integer TRUE 1
jsonlite character “hello” “hello” character character TRUE [“hello”]
RJSONIO character “hello” “hello” character character TRUE [ “hello” ]
yaml character “hello” “hello” character character TRUE hello
jsonlite boolean_vec c(TRUE, FALSE, FALSE) c(TRUE, FALSE, FALSE) logical logical TRUE [true,false,false]
RJSONIO boolean_vec c(TRUE, FALSE, FALSE) c(TRUE, FALSE, FALSE) logical logical TRUE [ true, false, false ]
yaml boolean_vec c(TRUE, FALSE, FALSE) c(TRUE, FALSE, FALSE) logical logical TRUE - yes\n- no\n- no
jsonlite numeric_vec c(1.23, 4.56) c(1.23, 4.56) numeric numeric TRUE [1.23,4.56]
RJSONIO numeric_vec c(1.23, 4.56) c(1.23, 4.56) numeric numeric TRUE [ 1.23, 4.56 ]
yaml numeric_vec c(1.23, 4.56) c(1.23, 4.56) numeric numeric TRUE - 1.23\n- 4.56
jsonlite integer_vec c(1L, 5L, 3L) c(1L, 5L, 3L) integer integer TRUE [1,5,3]
RJSONIO integer_vec c(1L, 5L, 3L) c(1, 5, 3) integer numeric FALSE [ 1, 5, 3 ]
yaml integer_vec c(1L, 5L, 3L) c(1L, 5L, 3L) integer integer TRUE - 1\n- 5\n- 3
jsonlite character_vec c(“a”, “b”, “c”) c(“a”, “b”, “c”) character character TRUE [“a”,“b”,“c”]
RJSONIO character_vec c(“a”, “b”, “c”) c(“a”, “b”, “c”) character character TRUE [ “a”, “b”, “c” ]
yaml character_vec c(“a”, “b”, “c”) c(“a”, “b”, “c”) character character TRUE - a\n- b\n- c
jsonlite boolean_named_vec c(TRUE, FALSE, FALSE) c(TRUE, FALSE, FALSE) logical logical FALSE [true,false,false]
RJSONIO boolean_named_vec c(TRUE, FALSE, FALSE) c(TRUE, FALSE, FALSE) logical logical TRUE {\n “a”: true,\n“b”: false,\n“c”: false \n}
yaml boolean_named_vec c(TRUE, FALSE, FALSE) c(TRUE, FALSE, FALSE) logical logical FALSE - yes\n- no\n- no
jsonlite numeric_named_vec c(1.23, 4.56) c(1.23, 4.56) numeric numeric FALSE [1.23,4.56]
RJSONIO numeric_named_vec c(1.23, 4.56) c(1.23, 4.56) numeric numeric TRUE {\n “a”: 1.23,\n“b”: 4.56 \n}
yaml numeric_named_vec c(1.23, 4.56) c(1.23, 4.56) numeric numeric FALSE - 1.23\n- 4.56
jsonlite integer_named_vec c(1L, 5L, 3L) c(1L, 5L, 3L) integer integer FALSE [1,5,3]
RJSONIO integer_named_vec c(1L, 5L, 3L) c(1, 5, 3) integer numeric FALSE {\n “a”: 1,\n “b”: 5,\n “c”: 3 \n }
yaml integer_named_vec c(1L, 5L, 3L) c(1L, 5L, 3L) integer integer FALSE - 1\n- 5\n- 3
jsonlite character_named_vec c(“a”, “b”, “c”) c(“a”, “b”, “c”) character character FALSE [“a”,“b”,“c”]
RJSONIO character_named_vec c(“a”, “b”, “c”) c(“a”, “b”, “c”) character character TRUE {\n “a”: “a”,\n“b”: “b”,\n“c”: “c” \n}
yaml character_named_vec c(“a”, “b”, “c”) c(“a”, “b”, “c”) character character FALSE - a\n- b\n- c
jsonlite boolean_vec_with_na c(TRUE, FALSE, NA) c(TRUE, FALSE, NA) logical logical TRUE [true,false,null]
RJSONIO boolean_vec_with_na c(TRUE, FALSE, NA) list(TRUE, FALSE, NULL) logical list FALSE [ true, false, null ]
yaml boolean_vec_with_na c(TRUE, FALSE, NA) c(TRUE, FALSE, NA) logical logical TRUE - yes\n- no\n- .na
jsonlite numeric_vec_with_na c(1.23, NA) c(1.23, NA) numeric numeric TRUE [1.23,“NA”]
RJSONIO numeric_vec_with_na c(1.23, NA) list(1.23, NULL) numeric list FALSE [ 1.23, null ]
yaml numeric_vec_with_na c(1.23, NA) c(1.23, NA) numeric numeric TRUE - 1.23\n- .na.real
jsonlite integer_vec_with_na c(1L, 2L, NA) c(1L, 2L, NA) integer integer TRUE [1,2,“NA”]
RJSONIO integer_vec_with_na c(1L, 2L, NA) list(1, 2, NULL) integer list FALSE [ 1, 2, null ]
yaml integer_vec_with_na c(1L, 2L, NA) c(1L, 2L, NA) integer integer TRUE - 1\n- 2\n- .na.integer
jsonlite character_vec_with_na c(“a”, “b”, NA) c(“a”, “b”, NA) character character TRUE [“a”,“b”,null]
RJSONIO character_vec_with_na c(“a”, “b”, NA) list(“a”, “b”, NULL) character list FALSE [ “a”, “b”, null ]
yaml character_vec_with_na c(“a”, “b”, NA) c(“a”, “b”, NA) character character TRUE - a\n- b\n- .na.character
jsonlite boolean_list list(TRUE, FALSE, FALSE) c(TRUE, FALSE, FALSE) list matrix FALSE [[true],[false],[false]]
RJSONIO boolean_list list(TRUE, FALSE, FALSE) c(TRUE, FALSE, FALSE) list logical FALSE [\n true,\nfalse,\nfalse \n]
yaml boolean_list list(TRUE, FALSE, FALSE) c(TRUE, FALSE, FALSE) list logical FALSE - yes\n- no\n- no
jsonlite numeric_list list(1.23, 4.56) c(1.23, 4.56) list matrix FALSE [[1.23],[4.56]]
RJSONIO numeric_list list(1.23, 4.56) c(1.23, 4.56) list numeric FALSE [\n 1.23,\n 4.56 \n]
yaml numeric_list list(1.23, 4.56) c(1.23, 4.56) list numeric FALSE - 1.23\n- 4.56
jsonlite integer_list list(1L, 5L, 3L) c(1L, 5L, 3L) list matrix FALSE [[1],[5],[3]]
RJSONIO integer_list list(1L, 5L, 3L) c(1, 5, 3) list numeric FALSE [\n 1,\n5,\n3 \n]
yaml integer_list list(1L, 5L, 3L) c(1L, 5L, 3L) list integer FALSE - 1\n- 5\n- 3
jsonlite character_list list(“a”, “b”, “c”) c(“a”, “b”, “c”) list matrix FALSE [[“a”],[“b”],[“c”]]
RJSONIO character_list list(“a”, “b”, “c”) c(“a”, “b”, “c”) list character FALSE [\n “a”,\n“b”,\n“c” \n]
yaml character_list list(“a”, “b”, “c”) c(“a”, “b”, “c”) list character FALSE - a\n- b\n- c
jsonlite boolean_list_with_null list(TRUE, FALSE, NULL) list(TRUE, FALSE, list()) list list FALSE [[true],[false],{}]
RJSONIO boolean_list_with_null list(TRUE, FALSE, NULL) list(TRUE, FALSE, NULL) list list TRUE [\n true,\nfalse,\nnull \n]
yaml boolean_list_with_null list(TRUE, FALSE, NULL) list(TRUE, FALSE, NULL) list list TRUE - yes\n- no\n- ~
jsonlite numeric_list_with_null list(1.23, NULL) list(1.23, list()) list list FALSE [[1.23],{}]
RJSONIO numeric_list_with_null list(1.23, NULL) list(1.23, NULL) list list TRUE [\n 1.23,\nnull \n]
yaml numeric_list_with_null list(1.23, NULL) list(1.23, NULL) list list TRUE - 1.23\n- ~
jsonlite integer_list_with_null list(1L, 5L, NULL) list(1L, 5L, list()) list list FALSE [[1],[5],{}]
RJSONIO integer_list_with_null list(1L, 5L, NULL) list(1, 5, NULL) list list FALSE [\n 1,\n5,\nnull \n]
yaml integer_list_with_null list(1L, 5L, NULL) list(1L, 5L, NULL) list list TRUE - 1\n- 5\n- ~
jsonlite character_list_with_null list(“a”, “b”, NULL) list(“a”, “b”, list()) list list FALSE [[“a”],[“b”],{}]
RJSONIO character_list_with_null list(“a”, “b”, NULL) list(“a”, “b”, NULL) list list TRUE [\n “a”,\n“b”,\nnull \n]
yaml character_list_with_null list(“a”, “b”, NULL) list(“a”, “b”, NULL) list list TRUE - a\n- b\n- ~
jsonlite boolean_named_list list(a = TRUE, b = FALSE, c = FALSE) list(a = TRUE, b = FALSE, c = FALSE) list list TRUE {“a”:[true],“b”:[false],“c”:[false]}
RJSONIO boolean_named_list list(a = TRUE, b = FALSE, c = FALSE) c(TRUE, FALSE, FALSE) list logical FALSE {\n “a”: true,\n“b”: false,\n“c”: false \n}
yaml boolean_named_list list(a = TRUE, b = FALSE, c = FALSE) list(a = TRUE, b = FALSE, c = FALSE) list list TRUE a: yes\nb: no\nc: no
jsonlite numeric_named_list list(a = 1.23, b = 4.56) list(a = 1.23, b = 4.56) list list TRUE {“a”:[1.23],“b”:[4.56]}
RJSONIO numeric_named_list list(a = 1.23, b = 4.56) c(1.23, 4.56) list numeric FALSE {\n “a”: 1.23,\n“b”: 4.56 \n}
yaml numeric_named_list list(a = 1.23, b = 4.56) list(a = 1.23, b = 4.56) list list TRUE a: 1.23\nb: 4.56
jsonlite integer_named_list list(a = 1L, b = 5L, c = 3L) list(a = 1L, b = 5L, c = 3L) list list TRUE {“a”:[1],“b”:[5],“c”:[3]}
RJSONIO integer_named_list list(a = 1L, b = 5L, c = 3L) c(1, 5, 3) list numeric FALSE {\n “a”: 1,\n“b”: 5,\n“c”: 3 \n}
yaml integer_named_list list(a = 1L, b = 5L, c = 3L) list(a = 1L, b = 5L, c = 3L) list list TRUE a: 1\nb: 5\nc: 3
jsonlite character_named_list list(a = “a”, b = “b”, c = “c”) list(a = “a”, b = “b”, c = “c”) list list TRUE {“a”:[“a”],“b”:[“b”],“c”:[“c”]}
RJSONIO character_named_list list(a = “a”, b = “b”, c = “c”) c(“a”, “b”, “c”) list character FALSE {\n “a”: “a”,\n“b”: “b”,\n“c”: “c” \n}
yaml character_named_list list(a = “a”, b = “b”, c = “c”) list(a = “a”, b = “b”, c = “c”) list list TRUE a: a\nb: b\nc: c
jsonlite boolean_matrix c(TRUE, TRUE, TRUE, TRUE) c(TRUE, TRUE, TRUE, TRUE) matrix matrix TRUE [[true,true],[true,true]]
RJSONIO boolean_matrix c(TRUE, TRUE, TRUE, TRUE) list(c(TRUE, TRUE), c(TRUE, TRUE)) matrix list FALSE [ [ true, true ],\n[ true, true ] ]
yaml boolean_matrix c(TRUE, TRUE, TRUE, TRUE) c(TRUE, TRUE, TRUE, TRUE) matrix logical FALSE - yes\n- yes\n- yes\n- yes
jsonlite numeric_matrix c(1.2, 1.2, 1.2, 1.2) c(1.2, 1.2, 1.2, 1.2) matrix matrix TRUE [[1.2,1.2],[1.2,1.2]]
RJSONIO numeric_matrix c(1.2, 1.2, 1.2, 1.2) list(c(1.2, 1.2), c(1.2, 1.2)) matrix list FALSE [ [ 1.2, 1.2 ],\n[ 1.2, 1.2 ] ]
yaml numeric_matrix c(1.2, 1.2, 1.2, 1.2) c(1.2, 1.2, 1.2, 1.2) matrix numeric FALSE - 1.2\n- 1.2\n- 1.2\n- 1.2
jsonlite integer_matrix c(1L, 1L, 1L, 1L) c(1L, 1L, 1L, 1L) matrix matrix TRUE [[1,1],[1,1]]
RJSONIO integer_matrix c(1L, 1L, 1L, 1L) list(c(1, 1), c(1, 1)) matrix list FALSE [ [ 1, 1 ],\n[ 1, 1 ] ]
yaml integer_matrix c(1L, 1L, 1L, 1L) c(1L, 1L, 1L, 1L) matrix integer FALSE - 1\n- 1\n- 1\n- 1
jsonlite character_matrix c(“a”, “a”, “a”, “a”) c(“a”, “a”, “a”, “a”) matrix matrix TRUE [[“a”,“a”],[“a”,“a”]]
RJSONIO character_matrix c(“a”, “a”, “a”, “a”) list(c(“a”, “a”), c(“a”, “a”)) matrix list FALSE [ [ “a”, “a” ],\n[ “a”, “a” ] ]
yaml character_matrix c(“a”, “a”, “a”, “a”) c(“a”, “a”, “a”, “a”) matrix character FALSE - a\n- a\n- a\n- a
jsonlite data.frame list(a = 1L, b = 2.1, c = “c”) list(a = 1L, b = 2.1, c = “c”) data.frame data.frame TRUE [{“a”:1,“b”:2.1,“c”:“c”}]
RJSONIO data.frame list(a = 1L, b = 2.1, c = “c”) list(a = 1, b = 2.1, c = “c”) data.frame list FALSE {\n “a”: [ 1 ],\n“b”: [ 2.1 ],\n“c”: [ “c” ] \n}
yaml data.frame list(a = 1L, b = 2.1, c = “c”) list(a = 1L, b = 2.1, c = “c”) data.frame list FALSE a: 1\nb: 2.1\nc: c