An API-first R package for accessing 500,000+ machine learning models, embeddings, and datasets on the Hugging Face Hub. No Python required.
# install.packages("devtools")
devtools::install_github("farach/huggingfaceR")Get a free API token from huggingface.co/settings/tokens, then configure it in R:
library(huggingfaceR)
hf_set_token("hf_your_token_here", store = TRUE)
hf_whoami()# Sentiment analysis
hf_classify("I love using R for data science!")
#> # A tibble: 1 x 3
#> text label score
#> <chr> <chr> <dbl>
#> 1 I love using R for data science! POSITIVE 0.999
# Zero-shot classification with custom labels
hf_classify_zero_shot(
"I just bought a new laptop for coding",
labels = c("technology", "sports", "politics", "food")
)sentences <- c(
"The cat sat on the mat",
"A feline rested on the rug",
"The dog played in the park"
)
embeddings <- hf_embed(sentences)
embeddings
#> # A tibble: 3 x 3
#> text embedding n_dims
#> <chr> <list> <int>
#> 1 The cat sat on the mat <dbl [384]> 384
#> 2 A feline rested on the rug <dbl [384]> 384
#> 3 The dog played in the park <dbl [384]> 384
hf_similarity(embeddings)
#> # A tibble: 3 x 3
#> text_1 text_2 similarity
#> <chr> <chr> <dbl>
#> 1 The cat sat on the mat A feline rested on the rug 0.89
#> 2 The cat sat on the mat The dog played in the park 0.45
#> 3 A feline rested on ... The dog played in the park 0.39hf_chat("What is the tidyverse?")
# With a system prompt
hf_chat(
"Explain logistic regression in two sentences.",
system = "You are a statistics instructor. Use plain language."
)
# Multi-turn conversation
convo <- hf_conversation(system = "You are a helpful R tutor.")
convo <- chat(convo, "How do I read a CSV file?")
convo <- chat(convo, "What about Excel files?")hf_generate("Once upon a time in a land far away,", max_new_tokens = 100)
hf_fill_mask("The capital of France is [MASK].")
#> # A tibble: 5 x 4
#> text token score filled
#> <chr> <chr> <dbl> <chr>
#> 1 The capital of France is [MASK]. paris 0.88 The capital of France is paris.
#> 2 The capital of France is [MASK]. lyon 0.03 The capital of France is lyon.
#> ...All functions accept character vectors and return tibbles.
library(dplyr)
library(tidyr)
reviews <- tibble(
id = 1:3,
text = c(
"This product is amazing!",
"Terrible experience.",
"It's okay, nothing special."
)
)
reviews |>
mutate(sentiment = hf_classify(text)) |>
unnest(sentiment, names_sep = "_") |>
select(id, text, sentiment_label, sentiment_score)Use embeddings as features in machine learning workflows:
library(tidymodels)
rec <- recipe(sentiment ~ text, data = train_data) |>
step_hf_embed(text)
wf <- workflow() |>
add_recipe(rec) |>
add_model(logistic_reg()) |>
fit(data = train_data)Semantic search and document clustering:
docs |>
hf_embed_text(text) |>
hf_nearest_neighbors("machine learning", k = 5)
docs |>
hf_embed_text(text) |>
hf_cluster_texts(k = 3) |>
hf_extract_topics(text_col = "text", k = 3)# Search models
hf_search_models(task = "text-classification", limit = 10)
# Load datasets into tibbles (no Python needed)
imdb <- hf_load_dataset("imdb", split = "train", limit = 1000)vignette("getting-started")– setup and first examplesvignette("text-classification")– sentiment analysis and zero-shot labelingvignette("embeddings-and-similarity")– semantic search, clustering, visualizationvignette("llm-chat-and-generation")– conversations and text generationvignette("hub-datasets-and-modeling")– Hub discovery and tidymodels pipelinesvignette("anthropic-economic-index")– AI productivity research with the Anthropic Economic Index
MIT