Read a text file or character list and replace unicode characters [^\x00-\x7F]
with equivalent ASCII or specified replacement.
Review table ?dat_unicode_tables_sel_byhand_e
for the ASCII replacements.
Not all unicode has a natural ASCII replacement, so those replacements can be specified with unicode_generic_replacement
.
e_text_unicode_to_ascii(
fn_in = NULL,
fn_out = NULL,
text_in = NULL,
unicode_generic_replacement = "XunicodeX",
sw_print_line_text = FALSE
)
text filename in.
text filename out. If NULL, then this is assigned fn_in
.
character list in.
a string to replace non-matched unicode characters.
FALSE
to print only the line/index number, TRUE
to print the line/index number and text on the line.
NULL, invisibly
Either use fn_in
and fn_out
for files, or use text_in
for a character list, but not both.
if (FALSE) {
# # file with lots of unicode to replace
# e_text_unicode_to_ascii(
# fn_in = "./data-raw/unicode/text_in_unicode.csv"
# , fn_out = "./data-raw/unicode/text_out_unicode.csv"
# , text_in = NULL
# , unicode_generic_replacement = "XxXunicodeXxX"
# , sw_print_line_text = FALSE
# )
#
# # list with lots of unicode to replace
# text_no_unicode <-
# e_text_unicode_to_ascii(
# fn_in = NULL
# , fn_out = NULL
# , text_in = erikmisc::dat_unicode_tables_sel_byhand_e$Glyph
# , unicode_generic_replacement = "XxXunicodeXxX"
# , sw_print_line_text = FALSE
# )
#
# # no unicode in text_in list
# text_no_unicode <-
# e_text_unicode_to_ascii(
# fn_in = NULL
# , fn_out = NULL
# , text_in = c("a", "b", "c")
# , unicode_generic_replacement = "XxXunicodeXxX"
# , sw_print_line_text = FALSE
# )
#
# # remove all unicode from all variables
# dat_all <-
# dat_all %>%
# dplyr::mutate(
# dplyr::across(
# .cols = tidyselect::everything()
# , .fns = ~ e_text_unicode_to_ascii(text_in = .x)
# )
# )
}