What’s our goal here? Some of us might want to one day work in industry where words like JSON, MySQL (or NoSQL), and Machine Learning are commonplace. In order to add to our overflowing skillsets (numchuku skills, bow hunting skills, computer hacking skills…) we have set ourselves a quest: analyze the Yelp Challenge data.
By the time we’re finished, we hope to have a better understanding of what research and analysis (that is, data science) looks like in the world of (not-)for-profit corporations.
If you want to abandon MongoDB and just read in JSON Directly
## importing Yelp data using jsonlite
setwd("~/Desktop") # set working directory
library('jsonlite') # load jasonlite library
yelp_reviews <- stream_in(file("yelp_dataset_challenge_academic_dataset/yelp_academic_dataset_review.json"),flatten = TRUE) # import review data (flatten argument prevents nesting)
biz <- stream_in(file("yelp_dataset_challenge_academic_dataset/yelp_academic_dataset_business.json"),flatten = TRUE) # import business data (flatten argument prevents nesting)
id <- "e_U_FnpdKVgNb4mUN2cU_Q" # id is a variable that represents a single entry in biz$business_id (the unique identifier of the buisiness in the Yelp database)
biz[biz$business_id== id,c("name","city")] # indexes the business variable to search for the business id provided previously, and returns the name of the business, as well as the city
review.collection <- mongo(collection='reviews', db='yelp')
sampleSize <- round(review.collection$count()*.045)
review.collection$index()
## v _id name ns
## 1 1 1 _id_ yelp.reviews
# m$aggregate('[{"$group":{"_id":"$carrier", "count": {"$sum":1}, "average":{"$avg":"$distance"}}}]')
aSample <- review.collection$aggregate(
paste0('[
{ "$project" : { "_id" : 1, "text" : 1 , "business_id" : 1, "stars" : 1, "date" : 1} },
{ "$sample" : { "size" : ', sampleSize, ' } }
]'))
##
Found 1000 records...
Found 2000 records...
Found 3000 records...
Found 4000 records...
Found 5000 records...
Found 6000 records...
Found 7000 records...
Found 8000 records...
Found 9000 records...
Found 10000 records...
Found 11000 records...
Found 12000 records...
Found 13000 records...
Found 14000 records...
Found 15000 records...
Found 16000 records...
Found 17000 records...
Found 18000 records...
Found 19000 records...
Found 20000 records...
Found 21000 records...
Found 22000 records...
Found 23000 records...
Found 24000 records...
Found 25000 records...
Found 26000 records...
Found 27000 records...
Found 28000 records...
Found 29000 records...
Found 30000 records...
Found 31000 records...
Found 32000 records...
Found 33000 records...
Found 34000 records...
Found 35000 records...
Found 36000 records...
Found 37000 records...
Found 38000 records...
Found 39000 records...
Found 40000 records...
Found 41000 records...
Found 42000 records...
Found 43000 records...
Found 44000 records...
Found 45000 records...
Found 46000 records...
Found 47000 records...
Found 48000 records...
Found 49000 records...
Found 50000 records...
Found 51000 records...
Found 52000 records...
Found 53000 records...
Found 54000 records...
Found 55000 records...
Found 56000 records...
Found 57000 records...
Found 58000 records...
Found 59000 records...
Found 60000 records...
Found 61000 records...
Found 62000 records...
Found 63000 records...
Found 64000 records...
Found 65000 records...
Found 66000 records...
Found 67000 records...
Found 68000 records...
Found 69000 records...
Found 70000 records...
Found 70617 records...
Imported 70617 records. Simplifying into dataframe...
save(aSample, file='currentReviewSample.RData')
load(file='currentReviewSample.RData')
format(object.size(aSample), units='MB')
## [1] "56.7 Mb"
library(stm)
# textProcessor(documents, metadata=NULL,
# lowercase=TRUE, removestopwords=TRUE, removenumbers=TRUE,
# removepunctuation=TRUE, stem=TRUE, wordLengths=c(3,Inf),
# sparselevel=1, language="en",
# verbose=TRUE, onlycharacter= FALSE, striphtml=FALSE,
# customstopwords=NULL, onlytxtfiles=TRUE)
processedThing <- textProcessor(aSample$text, sparselevel=1, verbose=T)
## Building corpus...
## Converting to Lower Case...
## Removing stopwords...
## Removing numbers...
## Removing punctuation...
## Stemming...
## Creating Output...
prepped <- prepDocuments(processedThing$documents, processedThing$vocab, processedThing$meta)
## Removing 42073 of 70421 terms (42073 of 3509486 tokens) due to frequency
## Removing 9 Documents with No Words
## Your corpus now has 70595 documents, 28348 terms and 3467413 tokens.
aModel <- stm(processedThing$documents, processedThing$vocab, K=10)
## Beginning Initialization.
## ....................................................................................................
## Completed E-Step (47 seconds).
## Completed M-Step.
## Completing Iteration 1 (approx. per word bound = -7.438)
## ....................................................................................................
## Completed E-Step (40 seconds).
## Completed M-Step.
## Completing Iteration 2 (approx. per word bound = -7.434, relative change = 5.904e-04)
## ....................................................................................................
## Completed E-Step (39 seconds).
## Completed M-Step.
## Completing Iteration 3 (approx. per word bound = -7.429, relative change = 6.423e-04)
## ....................................................................................................
## Completed E-Step (39 seconds).
## Completed M-Step.
## Completing Iteration 4 (approx. per word bound = -7.424, relative change = 6.067e-04)
## ....................................................................................................
## Completed E-Step (38 seconds).
## Completed M-Step.
## Completing Iteration 5 (approx. per word bound = -7.420, relative change = 5.283e-04)
## Topic 1: place, like, just, one, good
## Topic 2: great, place, drink, friend, bar
## Topic 3: room, stay, show, vega, hotel
## Topic 4: get, look, store, need, shop
## Topic 5: pizza, salad, delici, perfect, restaur
## Topic 6: wait, ask, back, minut, said
## Topic 7: good, also, best, price, like
## Topic 8: year, staff, time, will, work
## Topic 9: order, food, good, chicken, fri
## Topic 10: food, place, time, tri, good
## ....................................................................................................
## Completed E-Step (38 seconds).
## Completed M-Step.
## Completing Iteration 6 (approx. per word bound = -7.417, relative change = 4.873e-04)
## ....................................................................................................
## Completed E-Step (38 seconds).
## Completed M-Step.
## Completing Iteration 7 (approx. per word bound = -7.413, relative change = 4.690e-04)
## ....................................................................................................
## Completed E-Step (38 seconds).
## Completed M-Step.
## Completing Iteration 8 (approx. per word bound = -7.410, relative change = 4.575e-04)
## ....................................................................................................
## Completed E-Step (37 seconds).
## Completed M-Step.
## Completing Iteration 9 (approx. per word bound = -7.407, relative change = 4.434e-04)
## ....................................................................................................
## Completed E-Step (37 seconds).
## Completed M-Step.
## Completing Iteration 10 (approx. per word bound = -7.404, relative change = 4.305e-04)
## Topic 1: place, like, just, one, good
## Topic 2: great, place, drink, friend, good
## Topic 3: room, stay, show, hotel, vega
## Topic 4: get, look, store, need, shop
## Topic 5: pizza, salad, delici, steak, dessert
## Topic 6: wait, ask, back, said, minut
## Topic 7: also, good, best, price, vega
## Topic 8: year, staff, time, day, will
## Topic 9: order, good, food, chicken, fri
## Topic 10: food, place, time, tri, good
## ....................................................................................................
## Completed E-Step (37 seconds).
## Completed M-Step.
## Completing Iteration 11 (approx. per word bound = -7.400, relative change = 4.183e-04)
## ....................................................................................................
## Completed E-Step (37 seconds).
## Completed M-Step.
## Completing Iteration 12 (approx. per word bound = -7.397, relative change = 4.044e-04)
## ....................................................................................................
## Completed E-Step (36 seconds).
## Completed M-Step.
## Completing Iteration 13 (approx. per word bound = -7.395, relative change = 3.890e-04)
## ....................................................................................................
## Completed E-Step (36 seconds).
## Completed M-Step.
## Completing Iteration 14 (approx. per word bound = -7.392, relative change = 3.671e-04)
## ....................................................................................................
## Completed E-Step (36 seconds).
## Completed M-Step.
## Completing Iteration 15 (approx. per word bound = -7.389, relative change = 3.387e-04)
## Topic 1: like, place, just, one, littl
## Topic 2: great, place, friend, drink, love
## Topic 3: room, stay, show, hotel, vega
## Topic 4: look, get, store, shop, need
## Topic 5: pizza, salad, steak, delici, dessert
## Topic 6: wait, ask, back, said, minut
## Topic 7: also, best, vega, price, amaz
## Topic 8: year, staff, time, work, day
## Topic 9: order, good, chicken, fri, food
## Topic 10: food, place, time, tri, good
## ....................................................................................................
## Completed E-Step (36 seconds).
## Completed M-Step.
## Completing Iteration 16 (approx. per word bound = -7.387, relative change = 3.084e-04)
## ....................................................................................................
## Completed E-Step (36 seconds).
## Completed M-Step.
## Completing Iteration 17 (approx. per word bound = -7.385, relative change = 2.790e-04)
## ....................................................................................................
## Completed E-Step (38 seconds).
## Completed M-Step.
## Completing Iteration 18 (approx. per word bound = -7.383, relative change = 2.525e-04)
## ....................................................................................................
## Completed E-Step (37 seconds).
## Completed M-Step.
## Completing Iteration 19 (approx. per word bound = -7.381, relative change = 2.288e-04)
## ....................................................................................................
## Completed E-Step (37 seconds).
## Completed M-Step.
## Completing Iteration 20 (approx. per word bound = -7.380, relative change = 2.083e-04)
## Topic 1: like, place, just, one, littl
## Topic 2: great, place, friend, love, drink
## Topic 3: room, stay, show, hotel, vega
## Topic 4: look, get, store, shop, need
## Topic 5: pizza, salad, steak, dessert, delici
## Topic 6: wait, ask, back, got, said
## Topic 7: also, best, vega, amaz, price
## Topic 8: year, time, staff, work, day
## Topic 9: order, chicken, good, fri, burger
## Topic 10: food, place, time, good, tri
## ....................................................................................................
## Completed E-Step (38 seconds).
## Completed M-Step.
## Completing Iteration 21 (approx. per word bound = -7.379, relative change = 1.910e-04)
## ....................................................................................................
## Completed E-Step (36 seconds).
## Completed M-Step.
## Completing Iteration 22 (approx. per word bound = -7.377, relative change = 1.756e-04)
## ....................................................................................................
## Completed E-Step (33 seconds).
## Completed M-Step.
## Completing Iteration 23 (approx. per word bound = -7.376, relative change = 1.620e-04)
## ....................................................................................................
## Completed E-Step (34 seconds).
## Completed M-Step.
## Completing Iteration 24 (approx. per word bound = -7.375, relative change = 1.500e-04)
## ....................................................................................................
## Completed E-Step (34 seconds).
## Completed M-Step.
## Completing Iteration 25 (approx. per word bound = -7.374, relative change = 1.395e-04)
## Topic 1: like, just, place, one, littl
## Topic 2: great, place, friend, love, drink
## Topic 3: room, stay, show, hotel, vega
## Topic 4: store, look, get, shop, need
## Topic 5: pizza, dessert, salad, steak, delici
## Topic 6: wait, ask, back, got, servic
## Topic 7: best, also, vega, amaz, top
## Topic 8: year, time, work, staff, will
## Topic 9: order, chicken, fri, good, burger
## Topic 10: food, place, good, time, tri
## ....................................................................................................
## Completed E-Step (34 seconds).
## Completed M-Step.
## Completing Iteration 26 (approx. per word bound = -7.373, relative change = 1.300e-04)
## ....................................................................................................
## Completed E-Step (381 seconds).
## Completed M-Step.
## Completing Iteration 27 (approx. per word bound = -7.372, relative change = 1.217e-04)
## ....................................................................................................
## Completed E-Step (34 seconds).
## Completed M-Step.
## Completing Iteration 28 (approx. per word bound = -7.371, relative change = 1.143e-04)
## ....................................................................................................
## Completed E-Step (33 seconds).
## Completed M-Step.
## Completing Iteration 29 (approx. per word bound = -7.370, relative change = 1.076e-04)
## ....................................................................................................
## Completed E-Step (34 seconds).
## Completed M-Step.
## Completing Iteration 30 (approx. per word bound = -7.370, relative change = 1.014e-04)
## Topic 1: like, just, place, one, littl
## Topic 2: great, place, friend, love, drink
## Topic 3: room, stay, show, hotel, vega
## Topic 4: store, look, shop, get, need
## Topic 5: pizza, dessert, salad, steak, delici
## Topic 6: wait, ask, back, got, order
## Topic 7: best, also, vega, amaz, top
## Topic 8: year, time, work, staff, will
## Topic 9: order, chicken, fri, burger, good
## Topic 10: food, place, good, time, tri
## ....................................................................................................
## Completed E-Step (34 seconds).
## Completed M-Step.
## Completing Iteration 31 (approx. per word bound = -7.369, relative change = 9.572e-05)
## ....................................................................................................
## Completed E-Step (33 seconds).
## Completed M-Step.
## Completing Iteration 32 (approx. per word bound = -7.368, relative change = 9.067e-05)
## ....................................................................................................
## Completed E-Step (33 seconds).
## Completed M-Step.
## Completing Iteration 33 (approx. per word bound = -7.368, relative change = 8.616e-05)
## ....................................................................................................
## Completed E-Step (33 seconds).
## Completed M-Step.
## Completing Iteration 34 (approx. per word bound = -7.367, relative change = 8.203e-05)
## ....................................................................................................
## Completed E-Step (32 seconds).
## Completed M-Step.
## Completing Iteration 35 (approx. per word bound = -7.366, relative change = 7.812e-05)
## Topic 1: like, just, place, one, littl
## Topic 2: great, place, friend, love, drink
## Topic 3: room, stay, show, hotel, vega
## Topic 4: store, shop, get, look, car
## Topic 5: pizza, dessert, salad, steak, delici
## Topic 6: wait, ask, back, got, order
## Topic 7: best, also, amaz, vega, top
## Topic 8: year, time, work, staff, will
## Topic 9: order, chicken, fri, burger, sandwich
## Topic 10: food, good, place, time, tri
## ....................................................................................................
## Completed E-Step (32 seconds).
## Completed M-Step.
## Completing Iteration 36 (approx. per word bound = -7.366, relative change = 7.470e-05)
## ....................................................................................................
## Completed E-Step (32 seconds).
## Completed M-Step.
## Completing Iteration 37 (approx. per word bound = -7.365, relative change = 7.178e-05)
## ....................................................................................................
## Completed E-Step (32 seconds).
## Completed M-Step.
## Completing Iteration 38 (approx. per word bound = -7.365, relative change = 6.929e-05)
## ....................................................................................................
## Completed E-Step (32 seconds).
## Completed M-Step.
## Completing Iteration 39 (approx. per word bound = -7.364, relative change = 6.701e-05)
## ....................................................................................................
## Completed E-Step (33 seconds).
## Completed M-Step.
## Completing Iteration 40 (approx. per word bound = -7.364, relative change = 6.486e-05)
## Topic 1: like, just, place, one, littl
## Topic 2: great, place, friend, love, drink
## Topic 3: room, stay, show, hotel, vega
## Topic 4: store, shop, get, look, car
## Topic 5: pizza, dessert, salad, steak, delici
## Topic 6: wait, ask, back, got, order
## Topic 7: best, amaz, also, vega, top
## Topic 8: year, time, work, will, staff
## Topic 9: order, chicken, fri, burger, sandwich
## Topic 10: food, good, place, tri, time
## ....................................................................................................
## Completed E-Step (36 seconds).
## Completed M-Step.
## Completing Iteration 41 (approx. per word bound = -7.363, relative change = 6.265e-05)
## ....................................................................................................
## Completed E-Step (34 seconds).
## Completed M-Step.
## Completing Iteration 42 (approx. per word bound = -7.363, relative change = 6.037e-05)
## ....................................................................................................
## Completed E-Step (34 seconds).
## Completed M-Step.
## Completing Iteration 43 (approx. per word bound = -7.363, relative change = 5.805e-05)
## ....................................................................................................
## Completed E-Step (31 seconds).
## Completed M-Step.
## Completing Iteration 44 (approx. per word bound = -7.362, relative change = 5.570e-05)
## ....................................................................................................
## Completed E-Step (32 seconds).
## Completed M-Step.
## Completing Iteration 45 (approx. per word bound = -7.362, relative change = 5.328e-05)
## Topic 1: like, just, place, one, littl
## Topic 2: great, place, love, friend, drink
## Topic 3: room, stay, show, hotel, vega
## Topic 4: store, shop, get, look, car
## Topic 5: pizza, dessert, salad, steak, delici
## Topic 6: wait, ask, back, got, order
## Topic 7: best, amaz, also, vega, top
## Topic 8: year, time, work, will, staff
## Topic 9: order, chicken, fri, burger, sandwich
## Topic 10: food, good, place, tri, time
## ....................................................................................................
## Completed E-Step (31 seconds).
## Completed M-Step.
## Completing Iteration 46 (approx. per word bound = -7.361, relative change = 5.076e-05)
## ....................................................................................................
## Completed E-Step (31 seconds).
## Completed M-Step.
## Completing Iteration 47 (approx. per word bound = -7.361, relative change = 4.818e-05)
## ....................................................................................................
## Completed E-Step (31 seconds).
## Completed M-Step.
## Completing Iteration 48 (approx. per word bound = -7.361, relative change = 4.556e-05)
## ....................................................................................................
## Completed E-Step (31 seconds).
## Completed M-Step.
## Completing Iteration 49 (approx. per word bound = -7.360, relative change = 4.310e-05)
## ....................................................................................................
## Completed E-Step (31 seconds).
## Completed M-Step.
## Completing Iteration 50 (approx. per word bound = -7.360, relative change = 4.084e-05)
## Topic 1: like, just, place, one, littl
## Topic 2: great, place, love, friend, drink
## Topic 3: room, stay, hotel, show, vega
## Topic 4: store, shop, get, look, car
## Topic 5: pizza, dessert, salad, steak, cream
## Topic 6: wait, ask, back, got, order
## Topic 7: best, amaz, also, vega, top
## Topic 8: year, time, work, will, staff
## Topic 9: order, chicken, fri, burger, sandwich
## Topic 10: food, good, place, tri, time
## ....................................................................................................
## Completed E-Step (31 seconds).
## Completed M-Step.
## Completing Iteration 51 (approx. per word bound = -7.360, relative change = 3.879e-05)
## ....................................................................................................
## Completed E-Step (31 seconds).
## Completed M-Step.
## Completing Iteration 52 (approx. per word bound = -7.360, relative change = 3.709e-05)
## ....................................................................................................
## Completed E-Step (32 seconds).
## Completed M-Step.
## Completing Iteration 53 (approx. per word bound = -7.359, relative change = 3.568e-05)
## ....................................................................................................
## Completed E-Step (30 seconds).
## Completed M-Step.
## Completing Iteration 54 (approx. per word bound = -7.359, relative change = 3.460e-05)
## ....................................................................................................
## Completed E-Step (30 seconds).
## Completed M-Step.
## Completing Iteration 55 (approx. per word bound = -7.359, relative change = 3.361e-05)
## Topic 1: like, just, place, one, can
## Topic 2: great, place, friend, love, drink
## Topic 3: room, stay, hotel, show, vega
## Topic 4: store, shop, get, look, car
## Topic 5: pizza, dessert, salad, steak, cream
## Topic 6: wait, ask, back, got, order
## Topic 7: best, amaz, also, vega, top
## Topic 8: year, time, work, will, staff
## Topic 9: order, chicken, fri, burger, sandwich
## Topic 10: food, good, place, tri, time
## ....................................................................................................
## Completed E-Step (30 seconds).
## Completed M-Step.
## Completing Iteration 56 (approx. per word bound = -7.359, relative change = 3.268e-05)
## ....................................................................................................
## Completed E-Step (30 seconds).
## Completed M-Step.
## Completing Iteration 57 (approx. per word bound = -7.358, relative change = 3.186e-05)
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 58 (approx. per word bound = -7.358, relative change = 3.126e-05)
## ....................................................................................................
## Completed E-Step (30 seconds).
## Completed M-Step.
## Completing Iteration 59 (approx. per word bound = -7.358, relative change = 3.076e-05)
## ....................................................................................................
## Completed E-Step (30 seconds).
## Completed M-Step.
## Completing Iteration 60 (approx. per word bound = -7.358, relative change = 3.036e-05)
## Topic 1: like, just, place, one, can
## Topic 2: great, place, friend, love, drink
## Topic 3: room, stay, hotel, show, vega
## Topic 4: store, shop, get, look, car
## Topic 5: pizza, dessert, salad, steak, cream
## Topic 6: wait, ask, back, got, order
## Topic 7: best, amaz, also, vega, worth
## Topic 8: year, time, work, will, staff
## Topic 9: order, chicken, fri, burger, sandwich
## Topic 10: food, good, place, tri, time
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 61 (approx. per word bound = -7.357, relative change = 2.986e-05)
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 62 (approx. per word bound = -7.357, relative change = 2.944e-05)
## ....................................................................................................
## Completed E-Step (30 seconds).
## Completed M-Step.
## Completing Iteration 63 (approx. per word bound = -7.357, relative change = 2.894e-05)
## ....................................................................................................
## Completed E-Step (30 seconds).
## Completed M-Step.
## Completing Iteration 64 (approx. per word bound = -7.357, relative change = 2.837e-05)
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 65 (approx. per word bound = -7.357, relative change = 2.752e-05)
## Topic 1: like, just, place, one, can
## Topic 2: great, place, friend, love, drink
## Topic 3: room, stay, hotel, show, vega
## Topic 4: store, shop, get, look, car
## Topic 5: pizza, dessert, steak, salad, cream
## Topic 6: wait, ask, back, got, order
## Topic 7: best, amaz, also, vega, worth
## Topic 8: year, time, work, will, staff
## Topic 9: order, chicken, fri, burger, sandwich
## Topic 10: food, good, place, tri, time
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 66 (approx. per word bound = -7.356, relative change = 2.664e-05)
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 67 (approx. per word bound = -7.356, relative change = 2.580e-05)
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 68 (approx. per word bound = -7.356, relative change = 2.500e-05)
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 69 (approx. per word bound = -7.356, relative change = 2.428e-05)
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 70 (approx. per word bound = -7.356, relative change = 2.368e-05)
## Topic 1: like, just, place, one, can
## Topic 2: great, place, friend, love, drink
## Topic 3: room, stay, hotel, show, vega
## Topic 4: store, shop, car, get, look
## Topic 5: pizza, dessert, steak, cream, salad
## Topic 6: wait, ask, back, got, order
## Topic 7: best, amaz, also, vega, worth
## Topic 8: year, time, work, will, staff
## Topic 9: order, chicken, fri, burger, sandwich
## Topic 10: food, good, place, tri, time
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 71 (approx. per word bound = -7.355, relative change = 2.323e-05)
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 72 (approx. per word bound = -7.355, relative change = 2.291e-05)
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 73 (approx. per word bound = -7.355, relative change = 2.261e-05)
## ....................................................................................................
## Completed E-Step (30 seconds).
## Completed M-Step.
## Completing Iteration 74 (approx. per word bound = -7.355, relative change = 2.228e-05)
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 75 (approx. per word bound = -7.355, relative change = 2.181e-05)
## Topic 1: like, just, place, can, one
## Topic 2: great, place, friend, love, drink
## Topic 3: room, stay, hotel, show, vega
## Topic 4: store, shop, car, price, look
## Topic 5: pizza, dessert, steak, cream, salad
## Topic 6: wait, ask, back, got, order
## Topic 7: best, amaz, also, vega, worth
## Topic 8: year, time, work, will, staff
## Topic 9: order, chicken, fri, burger, sandwich
## Topic 10: food, good, place, tri, time
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 76 (approx. per word bound = -7.355, relative change = 2.118e-05)
## ....................................................................................................
## Completed E-Step (28 seconds).
## Completed M-Step.
## Completing Iteration 77 (approx. per word bound = -7.354, relative change = 2.042e-05)
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 78 (approx. per word bound = -7.354, relative change = 1.969e-05)
## ....................................................................................................
## Completed E-Step (28 seconds).
## Completed M-Step.
## Completing Iteration 79 (approx. per word bound = -7.354, relative change = 1.895e-05)
## ....................................................................................................
## Completed E-Step (28 seconds).
## Completed M-Step.
## Completing Iteration 80 (approx. per word bound = -7.354, relative change = 1.822e-05)
## Topic 1: like, just, place, can, one
## Topic 2: great, place, friend, love, drink
## Topic 3: room, stay, hotel, show, vega
## Topic 4: store, shop, car, price, look
## Topic 5: pizza, dessert, cream, steak, salad
## Topic 6: wait, ask, back, got, order
## Topic 7: best, amaz, also, vega, well
## Topic 8: year, time, work, will, staff
## Topic 9: order, chicken, fri, burger, sandwich
## Topic 10: food, good, place, tri, time
## ....................................................................................................
## Completed E-Step (28 seconds).
## Completed M-Step.
## Completing Iteration 81 (approx. per word bound = -7.354, relative change = 1.761e-05)
## ....................................................................................................
## Completed E-Step (30 seconds).
## Completed M-Step.
## Completing Iteration 82 (approx. per word bound = -7.354, relative change = 1.702e-05)
## ....................................................................................................
## Completed E-Step (28 seconds).
## Completed M-Step.
## Completing Iteration 83 (approx. per word bound = -7.354, relative change = 1.656e-05)
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 84 (approx. per word bound = -7.354, relative change = 1.624e-05)
## ....................................................................................................
## Completed E-Step (30 seconds).
## Completed M-Step.
## Completing Iteration 85 (approx. per word bound = -7.353, relative change = 1.598e-05)
## Topic 1: like, just, place, can, one
## Topic 2: great, place, friend, love, drink
## Topic 3: room, stay, hotel, show, vega
## Topic 4: store, shop, car, price, look
## Topic 5: pizza, dessert, cream, steak, salad
## Topic 6: wait, ask, back, got, order
## Topic 7: best, amaz, also, vega, well
## Topic 8: work, time, year, will, staff
## Topic 9: order, chicken, fri, burger, sandwich
## Topic 10: food, good, place, tri, time
## ....................................................................................................
## Completed E-Step (29 seconds).
## Completed M-Step.
## Completing Iteration 86 (approx. per word bound = -7.353, relative change = 1.572e-05)
## ....................................................................................................
## Completed E-Step (27 seconds).
## Completed M-Step.
## Completing Iteration 87 (approx. per word bound = -7.353, relative change = 1.537e-05)
## ....................................................................................................
## Completed E-Step (27 seconds).
## Completed M-Step.
## Completing Iteration 88 (approx. per word bound = -7.353, relative change = 1.503e-05)
## ....................................................................................................
## Completed E-Step (27 seconds).
## Completed M-Step.
## Completing Iteration 89 (approx. per word bound = -7.353, relative change = 1.454e-05)
## ....................................................................................................
## Completed E-Step (27 seconds).
## Completed M-Step.
## Completing Iteration 90 (approx. per word bound = -7.353, relative change = 1.388e-05)
## Topic 1: like, just, place, can, get
## Topic 2: great, place, friend, drink, love
## Topic 3: room, stay, hotel, show, vega
## Topic 4: store, shop, price, car, look
## Topic 5: pizza, dessert, cream, steak, salad
## Topic 6: wait, ask, back, got, order
## Topic 7: best, amaz, also, vega, well
## Topic 8: work, time, year, will, staff
## Topic 9: order, chicken, fri, burger, sandwich
## Topic 10: food, good, place, time, tri
## ....................................................................................................
## Completed E-Step (27 seconds).
## Completed M-Step.
## Completing Iteration 91 (approx. per word bound = -7.353, relative change = 1.339e-05)
## ....................................................................................................
## Completed E-Step (27 seconds).
## Completed M-Step.
## Completing Iteration 92 (approx. per word bound = -7.353, relative change = 1.304e-05)
## ....................................................................................................
## Completed E-Step (27 seconds).
## Completed M-Step.
## Completing Iteration 93 (approx. per word bound = -7.353, relative change = 1.276e-05)
## ....................................................................................................
## Completed E-Step (26 seconds).
## Completed M-Step.
## Completing Iteration 94 (approx. per word bound = -7.353, relative change = 1.249e-05)
## ....................................................................................................
## Completed E-Step (26 seconds).
## Completed M-Step.
## Completing Iteration 95 (approx. per word bound = -7.352, relative change = 1.217e-05)
## Topic 1: like, just, place, can, get
## Topic 2: great, place, friend, drink, bar
## Topic 3: room, stay, hotel, show, vega
## Topic 4: store, shop, price, car, look
## Topic 5: pizza, dessert, cream, steak, salad
## Topic 6: wait, ask, back, got, order
## Topic 7: best, amaz, also, vega, well
## Topic 8: work, time, year, will, staff
## Topic 9: order, chicken, fri, burger, sandwich
## Topic 10: food, good, place, servic, time
## ....................................................................................................
## Completed E-Step (26 seconds).
## Completed M-Step.
## Completing Iteration 96 (approx. per word bound = -7.352, relative change = 1.176e-05)
## ....................................................................................................
## Completed E-Step (27 seconds).
## Completed M-Step.
## Completing Iteration 97 (approx. per word bound = -7.352, relative change = 1.130e-05)
## ....................................................................................................
## Completed E-Step (26 seconds).
## Completed M-Step.
## Completing Iteration 98 (approx. per word bound = -7.352, relative change = 1.091e-05)
## ....................................................................................................
## Completed E-Step (26 seconds).
## Completed M-Step.
## Completing Iteration 99 (approx. per word bound = -7.352, relative change = 1.054e-05)
## ....................................................................................................
## Completed E-Step (26 seconds).
## Completed M-Step.
## Completing Iteration 100 (approx. per word bound = -7.352, relative change = 1.007e-05)
## Topic 1: like, just, place, get, can
## Topic 2: great, place, friend, drink, bar
## Topic 3: room, stay, hotel, show, vega
## Topic 4: store, shop, price, car, look
## Topic 5: pizza, dessert, cream, steak, salad
## Topic 6: wait, ask, back, got, order
## Topic 7: best, amaz, also, vega, well
## Topic 8: work, time, year, will, staff
## Topic 9: order, chicken, fri, burger, sandwich
## Topic 10: food, good, place, servic, time
## ....................................................................................................
## Completed E-Step (26 seconds).
## Completed M-Step.
## Model Converged
save(aModel, file='review_sample_topic_model.RData')
load(, file='review_sample_topic_model.RData')
labelTopics(aModel)
## Topic 1 Top Words:
## Highest Prob: like, just, place, get, can, one, realli
## FREX: donut, coffe, sometim, east, might, perhap, chain
## Lift: bosa, abolut, aboslut, accessbas, accommid, aeroport, afaik
## Score: abimé, abond, abondant, accéder, accompagn, accompagné, accueilli
## Topic 2 Top Words:
## Highest Prob: great, place, friend, drink, bar, love, night
## FREX: beer, patio, tap, brew, draft, und, irish
## Lift: aaaall, aan, aangenaam, abbay, abdh, abendessen, abendstunden
## Score: abimé, abond, abondant, accéder, accompagn, accompagné, accueilli
## Topic 3 Top Words:
## Highest Prob: room, stay, hotel, show, vega, get, see
## FREX: room, hotel, pool, club, casino, bathroom, danc
## Lift: luxor, aaaaaaaad, aahh, aall, aawwweeessoommeee, abba, abccom
## Score: abond, accéder, acompañada, affamé, affiché, âgés, agrad
## Topic 4 Top Words:
## Highest Prob: store, shop, price, car, look, need, get
## FREX: store, buy, hair, massag, cloth, brand, groceri
## Lift: abarth, acura, alamo, alpaca, ambush, ammunit, anklet
## Score: agaç, amoureux, angenehmeren, angeordnet, ärgerlich, aubain, aufgeteilt
## Topic 5 Top Words:
## Highest Prob: pizza, dessert, cream, steak, salad, chocol, ice
## FREX: pasta, crust, pie, cupcak, oliv, filet, scallop
## Lift: balsam, clam, était, foi, marinara, mussel, pie
## Score: abimé, abond, abondant, accéder, accompagn, accompagné, accueilli
## Topic 6 Top Words:
## Highest Prob: wait, ask, back, got, order, time, get
## FREX: rude, apolog, horribl, worst, manag, said, upset
## Lift: jetblu, paramed, rude, upset, aaaaaalright, aahhh, abajo
## Score: abimé, abond, accéder, accompagn, accompagné, accueilli, accueillir
## Topic 7 Top Words:
## Highest Prob: best, amaz, also, vega, love, well, favorit
## FREX: buffet, dim, varieti, yogurt, pricey, smoothi, froyo
## Lift: blini, blynk, bsbc, cfu, dlite, ducass, fatfre
## Score: abimé, abond, abondant, accéder, accompagn, accompagné, accueilli
## Topic 8 Top Words:
## Highest Prob: work, time, year, will, staff, day, help
## FREX: offic, doctor, pet, vet, yoga, dentist, instructor
## Lift: abdomin, abdul, accutemp, achill, acp, administ, adopt
## Score: abond, accéder, accompagn, accompagné, accueillir, achet, achèt
## Topic 9 Top Words:
## Highest Prob: order, chicken, fri, burger, sandwich, sauc, flavor
## FREX: chicken, fri, burger, taco, egg, pork, bbq
## Lift: abodaba, adobo, alfalfa, atkin, authentico, bab, bap
## Score: abimé, abond, abondant, accéder, accompagn, accompagné, accueilli
## Topic 10 Top Words:
## Highest Prob: food, good, place, servic, time, tri, restaur
## FREX: sushi, ayc, boba, roll, thai, food, eel
## Lift: amaebi, cartman, chanpen, chinees, cyclo, eew, filipina
## Score: abimé, abond, abondant, accéder, accompagn, accompagné, accueilli
plot.STM(aModel, type='perspectives', topics=c(1, 10))
plot.STM(aModel, type='labels')
sageLabels(aModel)
## Topic 1:
## Marginal Highest Prob: like, just, place, get, can, one, realli
## Marginal FREX: donut, coffe, sometim, east, might, perhap, chain
## Marginal Lift: bosa, abolut, aboslut, accessbas, accommid, aeroport, afaik
## Marginal Score: abimé, abond, abondant, accéder, accompagn, accompagné, accueilli
##
## Topic Kappa:
## Kappa with Baseline:
##
## Topic 2:
## Marginal Highest Prob: great, place, friend, drink, bar, love, night
## Marginal FREX: beer, patio, tap, brew, draft, und, irish
## Marginal Lift: aaaall, aan, aangenaam, abbay, abdh, abendessen, abendstunden
## Marginal Score: abimé, abond, abondant, accéder, accompagn, accompagné, accueilli
##
## Topic Kappa:
## Kappa with Baseline:
##
## Topic 3:
## Marginal Highest Prob: room, stay, hotel, show, vega, get, see
## Marginal FREX: room, hotel, pool, club, casino, bathroom, danc
## Marginal Lift: luxor, aaaaaaaad, aahh, aall, aawwweeessoommeee, abba, abccom
## Marginal Score: abond, accéder, acompañada, affamé, affiché, âgés, agrad
##
## Topic Kappa:
## Kappa with Baseline:
##
## Topic 4:
## Marginal Highest Prob: store, shop, price, car, look, need, get
## Marginal FREX: store, buy, hair, massag, cloth, brand, groceri
## Marginal Lift: abarth, acura, alamo, alpaca, ambush, ammunit, anklet
## Marginal Score: agaç, amoureux, angenehmeren, angeordnet, ärgerlich, aubain, aufgeteilt
##
## Topic Kappa:
## Kappa with Baseline:
##
## Topic 5:
## Marginal Highest Prob: pizza, dessert, cream, steak, salad, chocol, ice
## Marginal FREX: pasta, crust, pie, cupcak, oliv, filet, scallop
## Marginal Lift: balsam, clam, était, foi, marinara, mussel, pie
## Marginal Score: abimé, abond, abondant, accéder, accompagn, accompagné, accueilli
##
## Topic Kappa:
## Kappa with Baseline:
##
## Topic 6:
## Marginal Highest Prob: wait, ask, back, got, order, time, get
## Marginal FREX: rude, apolog, horribl, worst, manag, said, upset
## Marginal Lift: jetblu, paramed, rude, upset, aaaaaalright, aahhh, abajo
## Marginal Score: abimé, abond, accéder, accompagn, accompagné, accueilli, accueillir
##
## Topic Kappa:
## Kappa with Baseline:
##
## Topic 7:
## Marginal Highest Prob: best, amaz, also, vega, love, well, favorit
## Marginal FREX: buffet, dim, varieti, yogurt, pricey, smoothi, froyo
## Marginal Lift: blini, blynk, bsbc, cfu, dlite, ducass, fatfre
## Marginal Score: abimé, abond, abondant, accéder, accompagn, accompagné, accueilli
##
## Topic Kappa:
## Kappa with Baseline:
##
## Topic 8:
## Marginal Highest Prob: work, time, year, will, staff, day, help
## Marginal FREX: offic, doctor, pet, vet, yoga, dentist, instructor
## Marginal Lift: abdomin, abdul, accutemp, achill, acp, administ, adopt
## Marginal Score: abond, accéder, accompagn, accompagné, accueillir, achet, achèt
##
## Topic Kappa:
## Kappa with Baseline:
##
## Topic 9:
## Marginal Highest Prob: order, chicken, fri, burger, sandwich, sauc, flavor
## Marginal FREX: chicken, fri, burger, taco, egg, pork, bbq
## Marginal Lift: abodaba, adobo, alfalfa, atkin, authentico, bab, bap
## Marginal Score: abimé, abond, abondant, accéder, accompagn, accompagné, accueilli
##
## Topic Kappa:
## Kappa with Baseline:
##
## Topic 10:
## Marginal Highest Prob: food, good, place, servic, time, tri, restaur
## Marginal FREX: sushi, ayc, boba, roll, thai, food, eel
## Marginal Lift: amaebi, cartman, chanpen, chinees, cyclo, eew, filipina
## Marginal Score: abimé, abond, abondant, accéder, accompagn, accompagné, accueilli
##
## Topic Kappa:
## Kappa with Baseline:
##
-[ ] List of every category used, so we can pull business ids. -[ ] how to get records that match on a list of ids?