-
Notifications
You must be signed in to change notification settings - Fork 7
/
show.R
35 lines (27 loc) · 677 Bytes
/
show.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
setwd("G:\\项目\\网易云音乐评论\\文本挖掘")
csv <- read.csv("text2.csv",header=T, stringsAsFactors=F)
mystopwords<-unlist(read.table("StopWords.txt",stringsAsFactors=F))
dim(csv)
csv<-as.data.frame(csv)
head(csv)
#install.packages("tibble")
library(tibble)
d1<-csv
d2<-as.tibble(d1)
d2
library(dplyr)
library(tidyr)
library(purrr)
library(readr)
library(stringr)
group_by(d2,userID)
cleaned_text<-d2
library(tidytext)
usenet_words <- cleaned_text %>%
unnest_tokens(word, context) %>%
filter(str_detect(word, "[a-z']$"),
!word %in% stop_words$word)
#Words in Newsgroups
usenet_words<-cleaned_text
usenet_words %>%
count(context, sort = TRUE)