diff --git a/R/MetaNLP.R b/R/MetaNLP.R index 7c173d3..809f35d 100644 --- a/R/MetaNLP.R +++ b/R/MetaNLP.R @@ -113,8 +113,6 @@ MetaNLP <- function(file, (`[[`)(c("x")) |> # lower case tolower() |> - # lemmatization of the words - textstem::lemmatize_strings(dictionary = lexicon) |> tm::VectorSource() |> # create corpus object tm::Corpus() |> @@ -122,6 +120,8 @@ MetaNLP <- function(file, tm::tm_map(tm::content_transformer(replaceSpecialChars), language = language) |> # strip white space tm::tm_map(tm::stripWhitespace) |> + # lemmatization of the words + tm::tm_map(textstem::lemmatize_strings, dictionary = lexicon) |> # only use word stems tm::tm_map(tm::stemDocument, language = language) |> # create matrix @@ -133,7 +133,7 @@ MetaNLP <- function(file, # only choose word stems that appear at least a pre-specified number of times temp <- temp[, colSums(temp) >= bounds[1] & colSums(temp) <= bounds[2]] - +# # order by column name index_vec <- order(names(temp)) temp |> diff --git a/tests/testthat/test_constructor.R b/tests/testthat/test_constructor.R index fdeea99..b493e31 100644 --- a/tests/testthat/test_constructor.R +++ b/tests/testthat/test_constructor.R @@ -110,6 +110,7 @@ test_that("constructor works", { MetaNLP(source_path_ru, bounds = c(1, Inf), language = "russian", encoding = "UTF-8") ) + }) test_that("print methods work", { diff --git a/tests/testthat/test_deletion.R b/tests/testthat/test_deletion.R index c053b2b..7596600 100644 --- a/tests/testthat/test_deletion.R +++ b/tests/testthat/test_deletion.R @@ -45,8 +45,7 @@ test_that("Special characters can be replaces", { # load french data set source_path_fr <- test_path("data", "french_data.csv") - obj_fr <- MetaNLP(source_path_fr, bounds = c(1, Inf), language = "french", - stringsAsFactors=FALSE, fileEncoding = "latin1") + obj_fr <- MetaNLP(source_path_fr, bounds = c(1, Inf), language = "french") # add a column name that contains all possible special characters obj_fr@data_frame <- data.frame(obj_fr@data_frame,