{"data":[{"id":"deepseek-r1","name":"r1","display_name":"DeepSeek R1","description":"DeepSeek's flagship reasoning-focused LLM with strong performance in mathematics, coding, and logical inference, comparable to leading closed-source models.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":684.531,"modalities":{"input":["text"],"output":["text"]},"context_window":163840,"max_output_tokens":65536,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-07-31","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"DeepSeek","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":true,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-01-20","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":14,"ids":["accounts/fireworks/models/deepseek-r1","azure_ai/deepseek-r1","deepinfra/deepseek-ai/DeepSeek-R1","deepseek-llm-r1","deepseek-r1","deepseek-r1-0120","deepseek-r1-qwen3-8b","deepseek-r1685","deepseek-reasoner","deepseek.r1-v1:0","deepseek/deepseek-r1","deepseek/deepseek-reasoner","fireworks_ai/accounts/fireworks/models/deepseek-r1","hyperbolic/deepseek-ai/DeepSeek-R1","nebius/deepseek-ai/DeepSeek-R1","openrouter/deepseek/deepseek-r1","replicate/deepseek-ai/deepseek-r1","sambanova/DeepSeek-R1","snowflake/deepseek-r1","together_ai/deepseek-ai/DeepSeek-R1","us.deepseek.r1-v1:0","vercel_ai_gateway/deepseek/deepseek-r1"],"hf_likes":13288,"hf_downloads":4020320,"hf_downloads_all_time":20204112,"hf_trending_score":20,"updated_at":"2026-06-15 08:02:11"},{"id":"deepseek-r1-distill-qwen-32b","name":"deepseek-r1-distill-qwen-32b","display_name":"DeepSeek R1 Distill Qwen 32B","description":"A 32B Qwen-based model distilled from DeepSeek R1's reasoning capabilities, offering high-quality chain-of-thought performance at a mid-scale parameter count.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":32,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":32768,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-07-31","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Qwen","capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-01-29","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":8,"ids":["@cf/deepseek-ai/deepseek-r1-distill-qwen-32b","accounts/fireworks/models/deepseek-r1-distill-qwen-32b","deepinfra/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B","deepseek-llm-r1-distill-qwen-32b","deepseek-r1-distill-qwen-32b","deepseek/deepseek-r1-distill-qwen-32b","fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-32b","novita/deepseek/deepseek-r1-distill-qwen-32b","nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"],"hf_likes":1545,"hf_downloads":1046750,"hf_downloads_all_time":23929632,"hf_trending_score":3,"updated_at":"2026-06-15 08:02:11"},{"id":"deepseek-r1-distill-qwen-14b","name":"deepseek-r1-distill-qwen-14b","display_name":"DeepSeek R1 Distill Qwen 14B","description":"A 14B Qwen-based model distilled from DeepSeek R1, balancing strong reasoning performance with moderate computational requirements.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":14,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":5,"ids":["accounts/fireworks/models/deepseek-r1-distill-qwen-14b","deepseek-llm-r1-distill-qwen-14b","deepseek-r1-distill-qwen-14b","deepseek/deepseek-r1-distill-qwen-14b","fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-14b","novita/deepseek/deepseek-r1-distill-qwen-14b","nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-15 08:02:11"},{"id":"deepseek-r1-distill-llama-8b","name":"deepseek-r1-distill-llama-8b","display_name":"DeepSeek R1 Distill Llama 8B","description":"A compact 8B Llama-based model distilled from DeepSeek R1, delivering strong reasoning performance in a lightweight architecture.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":8,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":3,"ids":["accounts/fireworks/models/deepseek-r1-distill-llama-8b","deepseek-llm-r1-distill-llama-8b","deepseek-r1-distill-llama-8b","fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-llama-8b","nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-8B"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-15 08:02:11"},{"id":"deepseek-r1-distill-qwen-1-5b","name":"deepseek-r1-distill-qwen-1-5b","display_name":"DeepSeek R1 Distill Qwen 1.5B","description":"A 1.5B Qwen-based model distilled from DeepSeek R1's reasoning chains, offering chain-of-thought capabilities in an extremely compact form factor.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":1.5,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":3,"ids":["accounts/fireworks/models/deepseek-r1-distill-qwen-1p5b","deepseek-llm-r1-distill-qwen-1-5b","deepseek-r1-distill-qwen-1-5b","fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-1p5b","nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-15 08:02:11"},{"id":"deepseek-r1-528","name":"deepseek-r1-528","display_name":"DeepSeek R1 528","description":"The DeepSeek R1 0528 update, a reasoning-focused MoE LLM with improved chain-of-thought capabilities over the original R1 release.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":684.531,"modalities":{"input":["pdf","text"],"output":["text"]},"context_window":164000,"max_output_tokens":32768,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2025-03-31","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"DeepSeek","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":true,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-05-28","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":12,"ids":["accounts/fireworks/models/deepseek-r1-0528","crusoe/deepseek-ai/DeepSeek-R1-0528","deepinfra/deepseek-ai/DeepSeek-R1-0528","deepseek-ai/DeepSeek-R1-0528","deepseek-llm-r1-0528","deepseek-r1-0528-maas","deepseek-r1-528","deepseek/deepseek-r1-0528","fireworks_ai/accounts/fireworks/models/deepseek-r1-0528","hyperbolic/deepseek-ai/DeepSeek-R1-0528","lambda_ai/deepseek-r1-0528","nebius/deepseek-ai/DeepSeek-R1-0528","novita/deepseek/deepseek-r1-0528","openrouter/deepseek/deepseek-r1-0528","publishers/google/models/deepseek-r1-0528-maas","vertex_ai/deepseek-ai/deepseek-r1-0528-maas","wandb/deepseek-ai/DeepSeek-R1-0528"],"hf_likes":2445,"hf_downloads":707890,"hf_downloads_all_time":6472632,"hf_trending_score":1,"updated_at":"2026-06-15 08:02:11"},{"id":"deepseek-r1-528-turbo","name":"deepseek-r1-528-turbo","display_name":"DeepSeek R1 528 Turbo","description":"A turbo-speed variant of the DeepSeek R1 0528 reasoning model, optimized for fast inference while retaining strong chain-of-thought performance.","creator":"deepseek","family":"deepseek-r1","tier":"turbo","version":"1.0","type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":32768,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["deepinfra/deepseek-ai/DeepSeek-R1-0528-Turbo","deepseek-ai/DeepSeek-R1-0528-Turbo","deepseek-r1-528-turbo"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-15 08:02:11"},{"id":"deepseek-r1-528b","name":"deepseek-r1-528b","display_name":"DeepSeek R1 528B","description":"A 528B-parameter variant of the DeepSeek R1 reasoning model, offering large-scale chain-of-thought reasoning capabilities.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":528,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":2,"ids":["deepseek-r1-0528685","deepseek-r1-528b","together_ai/deepseek-ai/DeepSeek-R1-0528-tput"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-15 08:02:11"},{"id":"deepseek-r1-671b","name":"deepseek-r1-671b","display_name":"DeepSeek R1 671B","description":"The full-scale 671B-parameter DeepSeek R1 reasoning model using a Mixture-of-Experts architecture for advanced mathematical and logical reasoning.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":671,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":false,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["deepseek-r1-671b","lambda_ai/deepseek-r1-671b"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-15 08:02:11"},{"id":"deepseek-r1-8b","name":"deepseek-r1-8b","display_name":"DeepSeek R1 8B","description":"An 8B-parameter distillation of DeepSeek's R1 reasoning model, providing accessible chain-of-thought capabilities in a smaller footprint.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":8,"modalities":{"input":["text"],"output":["text"]},"context_window":65536,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["deepseek-r1-8b","llamagate/deepseek-r1-8b"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-15 08:02:11"},{"id":"deepseek-r1-basic","name":"deepseek-r1-basic","display_name":"DeepSeek R1 Basic","description":"A cost-optimized serverless deployment of DeepSeek R1 offering lower per-token pricing with reduced throughput compared to the fast endpoint.","creator":"deepseek","family":"deepseek-r1","tier":"basic","version":"1.0","type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":128000,"max_output_tokens":20480,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["accounts/fireworks/models/deepseek-r1-basic","deepseek-r1-basic","fireworks_ai/accounts/fireworks/models/deepseek-r1-basic"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-15 08:02:11"},{"id":"deepseek-r1-distill-qwen-7b","name":"deepseek-r1-distill-qwen-7b","display_name":"DeepSeek R1 Distill Qwen 7B","description":"A 7B Qwen-based model distilled from DeepSeek R1, providing efficient reasoning capabilities suitable for resource-constrained deployments.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":7,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":4,"ids":["accounts/fireworks/models/deepseek-r1-distill-qwen-7b","deepseek-llm-r1-distill-qwen-7b","deepseek-r1-distill-qwen-7b","fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-7b","nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-15 08:02:11"},{"id":"deepseek-r1-distill-qwen3-8b","name":"deepseek-r1-distill-qwen3-8b","display_name":"DeepSeek R1 Distill Qwen3 8B","description":"An 8B model distilled from DeepSeek R1 0528's chain-of-thought into the Qwen3 8B base, achieving strong open-source reasoning benchmark performance.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":8,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["accounts/fireworks/models/deepseek-r1-0528-distill-qwen3-8b","deepseek-r1-distill-qwen3-8b","fireworks_ai/accounts/fireworks/models/deepseek-r1-0528-distill-qwen3-8b"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-15 08:02:11"},{"id":"deepseek-r1-turbo","name":"deepseek-r1-turbo","display_name":"DeepSeek R1 Turbo","description":"A turbo-speed variant of DeepSeek R1 optimized for faster inference while preserving the model's strong mathematical and logical reasoning abilities.","creator":"deepseek","family":"deepseek-r1","tier":"turbo","version":"1.0","type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":64000,"max_output_tokens":16000,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":false,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":3,"ids":["deepinfra/deepseek-ai/DeepSeek-R1-Turbo","deepseek-r1-turbo","deepseek/deepseek-r1-turbo","novita/deepseek/deepseek-r1-turbo"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-15 08:02:11"},{"id":"deepseek-r1t2-chimera","name":"deepseek-r1t2-chimera","display_name":"DeepSeek R1T2 Chimera","description":"A 671B MoE model assembled by TNG Tech from DeepSeek R1-0528, R1, and V3-0324 checkpoints, representing the second-generation Chimera hybrid reasoning model.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"2.0","type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":163840,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-07-31","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"DeepSeek","capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-07-08","earliest_deprecation_date":null,"deprecated":false,"has_pricing":false,"provider_count":0,"ids":["deepseek-r1t2-chimera","tngtech/deepseek-r1t2-chimera"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-15 08:02:11"}],"pagination":{"page_size":50,"has_next":false,"next_token":null,"total_count":15},"meta":{"updated_at":"2026-06-15","request_id":"e36356f9-6162-4e12-a146-2373dc62bf6a","execution_ms":18}}