{"data":[{"id":"zhipu-glm-5-1","name":"glm-5-1","display_name":"GLM-5.1","description":"Z AI's next-generation flagship agentic LLM with significantly stronger coding capabilities, vision support, and file input, achieving top performance on SWE-Bench.","creator":"zhipu","family":"glm_moe_dsa","tier":"","version":"5-1","type":"language","size_in_bn":753.864,"modalities":{"input":["image","pdf","text"],"output":["text"]},"context_window":202800,"max_output_tokens":64000,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Other","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2026-04-07","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":4,"ids":["accounts/fireworks/models/glm-5p1","fireworks_ai/accounts/fireworks/models/glm-5p1","fireworks_ai/glm-5p1","glm-5-1","glm-5-1-non-reasoning","huggingface-llm-glm-5-1-fp8","z-ai/glm-5.1","zai-org/glm-5.1","zai-org/GLM-5.1","zai/glm-5.1","zhipu-glm-5-1"],"hf_likes":1449,"hf_downloads":147738,"hf_downloads_all_time":147738,"hf_trending_score":214,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"zhipu-glm-5-1","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.98,"max_input_per_1m":1.4,"min_output_per_1m":3.08,"max_output_per_1m":4.4,"min_cache_read_per_1m":0.182,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":4},"providers":[],"regions":[],"region_info":{}}},{"id":"moonshot-kimi-k2-5","name":"kimi-k2-5","display_name":"Kimi K2.5","description":"An updated iteration of Kimi K2 with enhanced reasoning, vision, and tool-use capabilities, supporting implicit caching for efficient inference.","creator":"moonshot","family":"kimi_k25","tier":"","version":"k2-5","type":"language","size_in_bn":1058.589,"modalities":{"input":["image","text","video"],"output":["text"]},"context_window":262144,"max_output_tokens":98304,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Other","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2026-01-27","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":12,"ids":["@cf/moonshotai/kimi-k2.5","accounts/fireworks/models/kimi-k2p5","azure_ai/kimi-k2.5","baseten/moonshotai/Kimi-K2.5","bedrock/ap-northeast-1/moonshotai.kimi-k2.5","bedrock/ap-south-1/moonshotai.kimi-k2.5","bedrock/ap-southeast-3/moonshotai.kimi-k2.5","bedrock/eu-north-1/moonshotai.kimi-k2.5","bedrock/moonshotai.kimi-k2.5","bedrock/sa-east-1/moonshotai.kimi-k2.5","bedrock/us-east-1/moonshotai.kimi-k2.5","bedrock/us-east-2/moonshotai.kimi-k2.5","bedrock/us-west-2/moonshotai.kimi-k2.5","fireworks_ai/accounts/fireworks/models/kimi-k2p5","fireworks_ai/kimi-k2p5","huggingface-llm-kimi-k2-5","kimi-k2-5","kimi-k2-5-non-reasoning","kimi-k2.5","moonshot-kimi-k2-5","moonshot/kimi-k2.5","moonshotai.kimi-k2.5","moonshotai/kimi-k2.5","moonshotai/Kimi-K2.5","openrouter/moonshotai/kimi-k2.5","together_ai/moonshotai/Kimi-K2.5","wandb/moonshotai/Kimi-K2.5"],"hf_likes":2753,"hf_downloads":5222216,"hf_downloads_all_time":9851195,"hf_trending_score":34,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"moonshot-kimi-k2-5","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.375,"max_input_per_1m":0.6,"min_output_per_1m":2.025,"max_output_per_1m":3.011,"min_cache_read_per_1m":0.1,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":12},"providers":[],"regions":[],"region_info":{}}},{"id":"zhipu-glm-4-7","name":"glm-4-7","display_name":"GLM-4.7","description":"A multilingual MoE LLM from Z AI designed for complex reasoning, agentic coding, and tool use, building on the GLM-4.6 architecture.","creator":"zhipu","family":"glm4_moe","tier":"","version":"4-7","type":"language","size_in_bn":358.338,"modalities":{"input":["image","pdf","text"],"output":["text"]},"context_window":204800,"max_output_tokens":131072,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Other","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":true,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-12-22","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":13,"ids":["accounts/fireworks/models/glm-4p7","baseten/zai-org/GLM-4.7","cerebras/zai-glm-4.7","fireworks_ai/accounts/fireworks/models/glm-4p7","fireworks_ai/glm-4p7","glm-4-7","glm-4-7-251222","glm-4-7-non-reasoning","glm-4.7","glm-4.7-maas","novita/zai-org/glm-4.7","openrouter/z-ai/glm-4.7","publishers/google/models/glm-4.7-maas","together_ai/zai-org/GLM-4.7","vertex_ai/zai-org/glm-4.7-maas","z-ai/glm-4.7","zai-org/glm-4.7","zai-org/GLM-4.7","zai.glm-4.7","zai/glm-4.7","zhipu-glm-4-7"],"hf_likes":2026,"hf_downloads":117151,"hf_downloads_all_time":436300,"hf_trending_score":4,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"zhipu-glm-4-7","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.4,"max_input_per_1m":2.25,"min_output_per_1m":1.75,"max_output_per_1m":2.75,"min_cache_read_per_1m":0.08,"min_cache_write_per_1m":0.06,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":13},"providers":[],"regions":[],"region_info":{}}},{"id":"minimax-m2-1","name":"minimax-m2-1","display_name":"MiniMax M2.1","description":"A refined M2.1 sub-version of MiniMax's MoE language model with improved reasoning, tool-use, and implicit caching for agentic tasks.","creator":"minimax","family":"minimax_m2","tier":"","version":"1","type":"language","size_in_bn":228.704,"modalities":{"input":["image","text"],"output":["text"]},"context_window":1000000,"max_output_tokens":196608,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Other","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-12-23","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":8,"ids":["accounts/fireworks/models/minimax-m2p1","bedrock/ap-northeast-1/minimax.minimax-m2.1","bedrock/ap-south-1/minimax.minimax-m2.1","bedrock/ap-southeast-3/minimax.minimax-m2.1","bedrock/eu-central-1/minimax.minimax-m2.1","bedrock/eu-north-1/minimax.minimax-m2.1","bedrock/eu-south-1/minimax.minimax-m2.1","bedrock/eu-west-1/minimax.minimax-m2.1","bedrock/eu-west-2/minimax.minimax-m2.1","bedrock/sa-east-1/minimax.minimax-m2.1","bedrock/us-east-1/minimax.minimax-m2.1","bedrock/us-east-2/minimax.minimax-m2.1","bedrock/us-west-2/minimax.minimax-m2.1","fireworks_ai/accounts/fireworks/models/minimax-m2p1","fireworks_ai/minimax-m2p1","gmi/MiniMaxAI/MiniMax-M2.1","huggingface-llm-minimax-m2-1","minimax-m2-1","minimax.minimax-m2.1","minimax/minimax-m2.1","minimax/MiniMax-M2.1","novita/minimax/minimax-m2.1","openrouter/minimax/minimax-m2.1"],"hf_likes":1351,"hf_downloads":35651,"hf_downloads_all_time":408444,"hf_trending_score":3,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"minimax-m2-1","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.29,"max_input_per_1m":0.3,"min_output_per_1m":0.95,"max_output_per_1m":1.2,"min_cache_read_per_1m":0.03,"min_cache_write_per_1m":0.375,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":8},"providers":[],"regions":[],"region_info":{}}},{"id":"minimax-m2","name":"minimax-m2","display_name":"MiniMax M2","description":"MiniMax's second-generation MoE language model with reasoning and tool-use capabilities, built for complex agentic and productivity workflows.","creator":"minimax","family":"mixtral","tier":"","version":null,"type":"language","size_in_bn":228.704,"modalities":{"input":["text"],"output":["text"]},"context_window":205000,"max_output_tokens":196608,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Other","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-10-23","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":9,"ids":["accounts/fireworks/models/minimax-m2","fireworks_ai/accounts/fireworks/models/minimax-m2","huggingface-llm-minimax-m2","minimax-m2","minimax.minimax-m2","minimax/minimax-m2","minimax/MiniMax-M2","novita/minimax/minimax-m2","openrouter/minimax/minimax-m2"],"hf_likes":1491,"hf_downloads":69357,"hf_downloads_all_time":1925616,"hf_trending_score":0,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"minimax-m2","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.255,"max_input_per_1m":0.3,"min_output_per_1m":1,"max_output_per_1m":1.2,"min_cache_read_per_1m":0.03,"min_cache_write_per_1m":0.03,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":9},"providers":[],"regions":[],"region_info":{}}},{"id":"openai-gpt-oss-120b","name":"gpt-oss-120b","display_name":"GPT OSS 120B","description":"A 120-billion-parameter open-weights GPT model from OpenAI designed for reasoning-intensive tasks with implicit caching support.","creator":"openai","family":"gpt_oss","tier":"","version":null,"type":"language","size_in_bn":120,"modalities":{"input":["image","text"],"output":["text"]},"context_window":131072,"max_output_tokens":131072,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-06","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"GPT","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":true,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-08-05","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":21,"ids":["@cf/openai/gpt-oss-120b","accounts/fireworks/models/gpt-oss-120b","azure_ai/gpt-oss-120b","baseten/openai/gpt-oss-120b","bedrock_mantle/openai.gpt-oss-120b","cerebras/gpt-oss-120b","crusoe/openai/gpt-oss-120b","databricks/databricks-gpt-oss-120b","deepinfra/openai/gpt-oss-120b","fireworks_ai/accounts/fireworks/models/gpt-oss-120b","gpt-oss-120b","gpt-oss-120b-low","gpt-oss-120b-maas","groq/openai/gpt-oss-120b","lemonade/gpt-oss-120b-mxfp-GGUF","novita/openai/gpt-oss-120b","ollama/gpt-oss:120b-cloud","openai-gpt-oss-120b","openai-reasoning-gpt-oss-120b","openai.gpt-oss-120b-1:0","openai/gpt-oss-120b","openai/gpt-oss-120b:free","openrouter/openai/gpt-oss-120b","ovhcloud/gpt-oss-120b","publishers/google/models/gpt-oss-120b-maas","replicate/openai/gpt-oss-120b","sambanova/gpt-oss-120b","together_ai/openai/gpt-oss-120b","vertex_ai/openai/gpt-oss-120b-maas","wandb/openai/gpt-oss-120b","watsonx/openai/gpt-oss-120b"],"hf_likes":4719,"hf_downloads":3524674,"hf_downloads_all_time":32348365,"hf_trending_score":25,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"openai-gpt-oss-120b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.039,"max_input_per_1m":15,"min_output_per_1m":0.18,"max_output_per_1m":60,"min_cache_read_per_1m":0.075,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":21},"providers":[],"regions":[],"region_info":{}}},{"id":"deepseek-v3-2","name":"v3-2","display_name":"DeepSeek V3.2","description":"DeepSeek's V3.2 MoE LLM featuring implicit caching support and improved tool-use capabilities over the V3.1 generation.","creator":"deepseek","family":"deepseek-v3","tier":"","version":"3.2","type":"language","size_in_bn":685.397,"modalities":{"input":["image","pdf","text"],"output":["text"]},"context_window":163840,"max_output_tokens":65536,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"DeepSeek","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":true,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-12-01","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":12,"ids":["accounts/fireworks/models/deepseek-v3p2","azure_ai/deepseek-v3.2","bedrock/ap-northeast-1/deepseek.v3.2","bedrock/ap-south-1/deepseek.v3.2","bedrock/ap-southeast-3/deepseek.v3.2","bedrock/eu-north-1/deepseek.v3.2","bedrock/sa-east-1/deepseek.v3.2","bedrock/us-east-1/deepseek.v3.2","bedrock/us-east-2/deepseek.v3.2","bedrock/us-west-2/deepseek.v3.2","deepseek-ai/DeepSeek-V3.2","deepseek-llm-deepseek-v3-2","deepseek-v3-2","deepseek-v3-2-251201","deepseek-v3-2-reasoning","deepseek-v3.2-maas","deepseek-v3.2685","deepseek.v3.2","deepseek/deepseek-v3.2","eu.deepseek.v3.2","fireworks_ai/accounts/fireworks/models/deepseek-v3p2","gmi/deepseek-ai/DeepSeek-V3.2","novita/deepseek/deepseek-v3.2","openrouter/deepseek/deepseek-v3.2","publishers/google/models/deepseek-v3.2-maas","us.deepseek.v3.2","vertex_ai/deepseek-ai/deepseek-v3.2-maas"],"hf_likes":1413,"hf_downloads":10366446,"hf_downloads_all_time":11229842,"hf_trending_score":6,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"deepseek-v3-2","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.2288,"max_input_per_1m":0.62,"min_output_per_1m":0.3432,"max_output_per_1m":1.85,"min_cache_read_per_1m":0.028,"min_cache_write_per_1m":0.056,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":12},"providers":[],"regions":[],"region_info":{}}},{"id":"deepseek-v3-1-terminus","name":"deepseek-v3-1-terminus","display_name":"DeepSeek V3.1 Terminus","description":"An update to DeepSeek V3.1 that addresses language consistency and agent capability issues while preserving the model's core performance.","creator":"deepseek","family":"deepseek-v3","tier":"terminus","version":"3.1","type":"language","size_in_bn":684.531,"modalities":{"input":["text"],"output":["text"]},"context_window":163840,"max_output_tokens":65536,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2025-03-31","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"DeepSeek","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-09-22","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":6,"ids":["accounts/fireworks/models/deepseek-v3p1-terminus","deepinfra/deepseek-ai/DeepSeek-V3.1-Terminus","deepseek-ai/DeepSeek-V3.1-Terminus","deepseek-v3-1-terminus","deepseek-v3-1-terminus-reasoning","deepseek/deepseek-v3.1-terminus","fireworks_ai/accounts/fireworks/models/deepseek-v3p1-terminus","novita/deepseek/deepseek-v3.1-terminus"],"hf_likes":363,"hf_downloads":3879,"hf_downloads_all_time":180017,"hf_trending_score":0,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"deepseek-v3-1-terminus","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.27,"max_input_per_1m":0.56,"min_output_per_1m":0.95,"max_output_per_1m":1.68,"min_cache_read_per_1m":0.13,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["deepinfra","huggingface","novita","openrouter","vercel_ai_gateway"],"provider_count":6},"providers":[],"regions":[],"region_info":{}}},{"id":"zhipu-glm-4-5","name":"glm-4-5","display_name":"GLM-4.5","description":"A 355B MoE foundation LLM from Z AI with 32B active parameters, designed for intelligent agents with strong reasoning and tool-use capabilities.","creator":"zhipu","family":"glm4_moe","tier":"","version":"4-5","type":"language","size_in_bn":358.338,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":98304,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-12-31","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Other","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2025-07-25","earliest_deprecation_date":"2026-06-19","deprecated":false,"has_pricing":true,"provider_count":8,"ids":["accounts/fireworks/models/glm-4p5","deepinfra/zai-org/GLM-4.5","fireworks_ai/accounts/fireworks/models/glm-4p5","glm-4.5","novita/zai-org/glm-4.5","vercel_ai_gateway/zai/glm-4.5","wandb/zai-org/GLM-4.5","z-ai/glm-4.5","zai-org/glm-4.5","zai/glm-4.5","zhipu-glm-4-5"],"hf_likes":1398,"hf_downloads":70876,"hf_downloads_all_time":400488,"hf_trending_score":0,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"zhipu-glm-4-5","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.4,"max_input_per_1m":55,"min_output_per_1m":1.6,"max_output_per_1m":200,"min_cache_read_per_1m":0.11,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["deepinfra"],"provider_count":8},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen3-coder-480b-a35b-instruct","name":"qwen3-coder-480b-a35b-instruct","display_name":"Qwen3 Coder 480B A35B Instruct","description":"Qwen3's flagship agentic code model with 480B total and 35B activated parameters, excelling at autonomous programming, tool calling, and browser-use tasks.","creator":"alibaba","family":"qwen3_moe","tier":"","version":null,"type":"language","size_in_bn":480,"modalities":{"input":["text"],"output":["text"]},"context_window":262144,"max_output_tokens":65536,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":8,"ids":["accounts/fireworks/models/qwen3-coder-480b-a35b-instruct","alibaba-qwen3-coder-480b-a35b-instruct","deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct","fireworks_ai/accounts/fireworks/models/qwen3-coder-480b-a35b-instruct","novita/qwen/qwen3-coder-480b-a35b-instruct","qwen/qwen3-coder-480b-a35b-instruct","Qwen/Qwen3-Coder-480B-A35B-Instruct","qwen3-coder-480b-a35b-instruct","wandb/Qwen/Qwen3-Coder-480B-A35B-Instruct"],"hf_likes":1325,"hf_downloads":57687,"hf_downloads_all_time":885858,"hf_trending_score":0.5,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen3-coder-480b-a35b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.22,"max_input_per_1m":100,"min_output_per_1m":1.3,"max_output_per_1m":150,"min_cache_read_per_1m":null,"min_cache_write_per_1m":0.022,"min_reasoning_per_1m":null,"cheapest_providers":["google_gemini","google_vertex_ai"],"provider_count":8},"providers":[],"regions":[],"region_info":{}}},{"id":"openai-gpt-oss-20b","name":"gpt-oss-20b","display_name":"GPT OSS 20B","description":"A 20-billion-parameter open-weights GPT model from OpenAI suited for reasoning and tool-use tasks at a smaller, more efficient scale.","creator":"openai","family":"gpt_oss","tier":"","version":null,"type":"language","size_in_bn":20,"modalities":{"input":["image","text"],"output":["text"]},"context_window":131072,"max_output_tokens":131072,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-06","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"GPT","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":true,"web_search":true,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-08-05","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":16,"ids":["@cf/openai/gpt-oss-20b","accounts/fireworks/models/gpt-oss-20b","bedrock_mantle/openai.gpt-oss-20b","databricks/databricks-gpt-oss-20b","deepinfra/openai/gpt-oss-20b","fireworks_ai/accounts/fireworks/models/gpt-oss-20b","gpt-oss-20b","gpt-oss-20b-low","gpt-oss-20b-maas","groq/openai/gpt-oss-20b","lemonade/gpt-oss-20b-mxfp4-GGUF","novita/openai/gpt-oss-20b","ollama/gpt-oss:20b-cloud","openai-gpt-oss-20b","openai-reasoning-gpt-oss-20b","openai.gpt-oss-20b-1:0","openai/gpt-oss-20b","openai/gpt-oss-20b:free","openrouter/openai/gpt-oss-20b","ovhcloud/gpt-oss-20b","publishers/google/models/gpt-oss-20b-maas","replicateopenai/gpt-oss-20b","together_ai/openai/gpt-oss-20b","vertex_ai/openai/gpt-oss-20b-maas","wandb/openai/gpt-oss-20b"],"hf_likes":4552,"hf_downloads":6455272,"hf_downloads_all_time":59707566,"hf_trending_score":12,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"openai-gpt-oss-20b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.029,"max_input_per_1m":5,"min_output_per_1m":0.14,"max_output_per_1m":20,"min_cache_read_per_1m":0.0375,"min_cache_write_per_1m":0.007,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":16},"providers":[],"regions":[],"region_info":{}}},{"id":"minimax-m1-80k","name":"minimax-m1-80k","display_name":"MiniMax M1 80k","description":"A long-context variant of MiniMax's open-weight hybrid MoE reasoning model supporting 80,000-token windows for extended document and multi-turn tasks.","creator":"minimax","family":"m1","tier":"","version":null,"type":"language","size_in_bn":456.09,"modalities":{"input":["text"],"output":["text"]},"context_window":1000000,"max_output_tokens":40000,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":false,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":3,"ids":["accounts/fireworks/models/minimax-m1-80k","fireworks_ai/accounts/fireworks/models/minimax-m1-80k","minimax-m1-80k","minimaxai/minimax-m1-80k","novita/minimaxai/minimax-m1-80k"],"hf_likes":691,"hf_downloads":743,"hf_downloads_all_time":101977,"hf_trending_score":0,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"minimax-m1-80k","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.1,"max_input_per_1m":0.55,"min_output_per_1m":0.1,"max_output_per_1m":2.2,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["fireworks_ai"],"provider_count":3},"providers":[],"regions":[],"region_info":{}}},{"id":"zhipu-glm-4-5-air","name":"glm-4-5-air","display_name":"GLM-4.5 Air","description":"A compact MoE variant of GLM-4.5 from Z AI, offering a lighter architecture while retaining strong agentic reasoning and tool-use performance.","creator":"zhipu","family":"glm4_moe","tier":"air","version":"4-5","type":"language","size_in_bn":110.469,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":131070,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-12-31","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Other","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-07-25","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":6,"ids":["accounts/fireworks/models/glm-4p5-air","fireworks_ai/accounts/fireworks/models/glm-4p5-air","glm-4-5-air","novita/zai-org/glm-4.5-air","vercel_ai_gateway/zai/glm-4.5-air","z-ai/glm-4.5-air","z-ai/glm-4.5-air:free","zai-org/glm-4.5-air","zai/glm-4.5-air","zhipu-glm-4-5-air"],"hf_likes":599,"hf_downloads":389697,"hf_downloads_all_time":3025118,"hf_trending_score":2,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"zhipu-glm-4-5-air","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.125,"max_input_per_1m":0.22,"min_output_per_1m":0.85,"max_output_per_1m":1.1,"min_cache_read_per_1m":0.03,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":6},"providers":[],"regions":[],"region_info":{}}},{"id":"mistral-large-3","name":"mistral-large-3","display_name":"Mistral Large 3","description":"Mistral AI's third-generation flagship model, a multimodal Mixture-of-Experts architecture with 675B total parameters and 41B active parameters.","creator":"mistral","family":"mistral3","tier":"","version":"3","type":"language","size_in_bn":null,"modalities":{"input":["image"],"output":["text"]},"context_window":262144,"max_output_tokens":8191,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":true,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2025-12-02","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":5,"ids":["accounts/fireworks/models/mistral-large-3-fp8","azure_ai/mistral-large-3","fireworks_ai/accounts/fireworks/models/mistral-large-3-fp8","mistral-large-3","mistral/mistral-large-3"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"mistral-large-3","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.5,"max_input_per_1m":1.2,"min_output_per_1m":1.2,"max_output_per_1m":1.8,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["azure_aifoundry","mistral","vercel_ai_gateway"],"provider_count":5},"providers":[],"regions":[],"region_info":{}}},{"id":"deepseek-v3-324","name":"deepseek-v3-324","display_name":"DeepSeek V3 324","description":"The March 2024 update of DeepSeek V3 (V3-0324), a 671B MoE LLM representing an improved iteration over the original V3 release.","creator":"deepseek","family":"deepseek-v3","tier":"","version":"3.0","type":"language","size_in_bn":684.531,"modalities":{"input":["text"],"output":["text"]},"context_window":163840,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":13,"ids":["accounts/fireworks/models/deepseek-v3-0324","azure_ai/deepseek-v3-0324","baseten/deepseek-ai/DeepSeek-V3-0324","crusoe/deepseek-ai/DeepSeek-V3-0324","deepinfra/deepseek-ai/DeepSeek-V3-0324","deepseek-ai/DeepSeek-V3-0324","deepseek-v3-0324","deepseek-v3-324","deepseek/deepseek-v3-0324","fireworks_ai/accounts/fireworks/models/deepseek-v3-0324","gmi/deepseek-ai/DeepSeek-V3-0324","hyperbolic/deepseek-ai/DeepSeek-V3-0324","lambda_ai/deepseek-v3-0324","nebius/deepseek-ai/DeepSeek-V3-0324","novita/deepseek/deepseek-v3-0324","sambanova/DeepSeek-V3-0324","wandb/deepseek-ai/DeepSeek-V3-0324"],"hf_likes":3101,"hf_downloads":617973,"hf_downloads_all_time":4563386,"hf_trending_score":0,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"deepseek-v3-324","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.2,"max_input_per_1m":114,"min_output_per_1m":0.4,"max_output_per_1m":275,"min_cache_read_per_1m":0.135,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["lambda"],"provider_count":13},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen3-vl-235b-a22b-instruct","name":"qwen3-vl-235b-a22b-instruct","display_name":"Qwen3 VL 235B A22B Instruct","description":"The flagship instruction-tuned vision-language MoE model in the Qwen3 series, with 235B total and 22B activated parameters for superior visual perception and reasoning.","creator":"alibaba","family":"qwen3_vl_moe","tier":"","version":null,"type":"language","size_in_bn":235,"modalities":{"input":["image","pdf","text"],"output":["text"]},"context_window":262144,"max_output_tokens":129024,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2025-03-31","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Qwen3","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-09-23","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":7,"ids":["accounts/fireworks/models/qwen3-vl-235b-a22b-instruct","alibaba-qwen3-vl-235b-a22b-instruct","alibaba/qwen3-vl-235b-a22b-instruct","dashscope/qwen3-vl-235b-a22b-instruct","fireworks_ai/accounts/fireworks/models/qwen3-vl-235b-a22b-instruct","gmi/Qwen/Qwen3-VL-235B-A22B-Instruct-FP8","novita/qwen/qwen3-vl-235b-a22b-instruct","qwen/qwen3-vl-235b-a22b-instruct","Qwen/Qwen3-VL-235B-A22B-Instruct","qwen3-vl-235b-a22b-instruct"],"hf_likes":383,"hf_downloads":947793,"hf_downloads_all_time":2172030,"hf_trending_score":0,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen3-vl-235b-a22b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.2,"max_input_per_1m":0.4,"min_output_per_1m":0.88,"max_output_per_1m":1.6,"min_cache_read_per_1m":0.11,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":7},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen3-next-80b-a3b-instruct","name":"qwen3-next-80b-a3b-instruct","display_name":"Qwen3 Next 80B A3B Instruct","description":"An instruction-tuned Qwen3 Next MoE model with 80B total and 3B activated parameters, optimized for text generation and conversational tasks.","creator":"alibaba","family":"qwen3_next","tier":"","version":null,"type":"language","size_in_bn":80,"modalities":{"input":["text"],"output":["text"]},"context_window":262144,"max_output_tokens":65536,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2025-09-30","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Qwen3","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-09-11","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":10,"ids":["accounts/fireworks/models/qwen3-next-80b-a3b-instruct","alibaba-qwen3-next-80b-a3b-instruct","alibaba/qwen3-next-80b-a3b-instruct","dashscope/qwen3-next-80b-a3b-instruct","deepinfra/Qwen/Qwen3-Next-80B-A3B-Instruct","fireworks_ai/accounts/fireworks/models/qwen3-next-80b-a3b-instruct","huggingface-reasoning-qwen3-next-80b-a3b-instruct","novita/qwen/qwen3-next-80b-a3b-instruct","qwen/qwen3-next-80b-a3b-instruct","Qwen/Qwen3-Next-80B-A3B-Instruct","qwen/qwen3-next-80b-a3b-instruct:free","qwen3-next-80b-a3b-instruct","together_ai/Qwen/Qwen3-Next-80B-A3B-Instruct"],"hf_likes":1012,"hf_downloads":292470,"hf_downloads_all_time":14864763,"hf_trending_score":4,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen3-next-80b-a3b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.09,"max_input_per_1m":0.9,"min_output_per_1m":0.9,"max_output_per_1m":1.5,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":10},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen3-coder-30b-a3b-instruct","name":"qwen3-coder-30b-a3b-instruct","display_name":"Qwen3 Coder 30B A3B Instruct","description":"An instruction-tuned Qwen3 MoE coding model with 30B total and 3B active parameters, designed for agentic programming tasks and tool-integrated workflows.","creator":"alibaba","family":"qwen3_moe","tier":"","version":null,"type":"language","size_in_bn":30,"modalities":{"input":["text"],"output":["text"]},"context_window":262144,"max_output_tokens":65536,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2025-06-30","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Qwen3","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-07-31","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":5,"ids":["accounts/fireworks/models/qwen3-coder-30b-a3b-instruct","alibaba-qwen3-coder-30b-a3b-instruct","fireworks_ai/accounts/fireworks/models/qwen3-coder-30b-a3b-instruct","huggingface-reasoning-qwen3-coder-30b-a3b-instruct","lemonade/Qwen3-Coder-30B-A3B-Instruct-GGUF","novita/qwen/qwen3-coder-30b-a3b-instruct","qwen/qwen3-coder-30b-a3b-instruct","qwen3-coder-30b-a3b-instruct"],"hf_likes":1018,"hf_downloads":2406663,"hf_downloads_all_time":7798034,"hf_trending_score":10,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen3-coder-30b-a3b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.07,"max_input_per_1m":0.45,"min_output_per_1m":0.26,"max_output_per_1m":2.25,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["huggingface","novita","openrouter"],"provider_count":5},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwq-32b","name":"qwq-32b","display_name":"QwQ 32B","description":"A 32B reasoning-focused LLM from Alibaba's Qwen team, designed to match frontier closed-model performance on complex reasoning and problem-solving tasks.","creator":"alibaba","family":"qwen2","tier":"","version":null,"type":"language","size_in_bn":32,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-06-30","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Qwen","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":false,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2025-03-05","earliest_deprecation_date":"2026-04-29","deprecated":false,"has_pricing":true,"provider_count":9,"ids":["@cf/qwen/qwq-32b","accounts/fireworks/models/qwq-32b","alibaba-qwq-32b","deepinfra/Qwen/QwQ-32B","fireworks_ai/accounts/fireworks/models/qwq-32b","huggingface-llm-qwq-32b","hyperbolic/Qwen/QwQ-32B","nebius/Qwen/QwQ-32B","nscale/Qwen/QwQ-32B","qwen/qwq-32b","qwq-32b","sambanova/QwQ-32B"],"hf_likes":2906,"hf_downloads":73024,"hf_downloads_all_time":2875672,"hf_trending_score":1,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwq-32b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.15,"max_input_per_1m":0.9,"min_output_per_1m":0.2,"max_output_per_1m":1,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["deepinfra","nebius"],"provider_count":9},"providers":[],"regions":[],"region_info":{}}},{"id":"deepseek-r1","name":"r1","display_name":"DeepSeek R1","description":"DeepSeek's flagship reasoning-focused LLM with strong performance in mathematics, coding, and logical inference, comparable to leading closed-source models.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":684.531,"modalities":{"input":["text"],"output":["text"]},"context_window":163840,"max_output_tokens":65536,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-07-31","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"DeepSeek","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":true,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-01-20","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":14,"ids":["accounts/fireworks/models/deepseek-r1","azure_ai/deepseek-r1","deepinfra/deepseek-ai/DeepSeek-R1","deepseek-llm-r1","deepseek-r1","deepseek-r1-0120","deepseek-r1-qwen3-8b","deepseek-r1685","deepseek-reasoner","deepseek.r1-v1:0","deepseek/deepseek-r1","deepseek/deepseek-reasoner","fireworks_ai/accounts/fireworks/models/deepseek-r1","hyperbolic/deepseek-ai/DeepSeek-R1","nebius/deepseek-ai/DeepSeek-R1","openrouter/deepseek/deepseek-r1","replicate/deepseek-ai/deepseek-r1","sambanova/DeepSeek-R1","snowflake/deepseek-r1","together_ai/deepseek-ai/DeepSeek-R1","us.deepseek.r1-v1:0","vercel_ai_gateway/deepseek/deepseek-r1"],"hf_likes":13288,"hf_downloads":4020320,"hf_downloads_all_time":20204112,"hf_trending_score":20,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"deepseek-r1","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.28,"max_input_per_1m":5,"min_output_per_1m":0.4,"max_output_per_1m":10,"min_cache_read_per_1m":0.028,"min_cache_write_per_1m":null,"min_reasoning_per_1m":10,"cheapest_providers":["deepseek"],"provider_count":14},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-3-1-405b-instruct","name":"llama-3-1-405b-instruct","display_name":"Llama 3.1 405B Instruct","description":"Meta's 405B instruction-tuned LLM optimized for following complex instructions, with FP8 quantization for efficient large-scale inference.","creator":"meta","family":"llama","tier":"","version":"3-1","type":"language","size_in_bn":405,"modalities":{"input":["image","text"],"output":["text"]},"context_window":131072,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2024-07-23","earliest_deprecation_date":"2026-07","deprecated":false,"has_pricing":true,"provider_count":11,"ids":["accounts/fireworks/models/llama-v3p1-405b-instruct","azure_ai/Meta-Llama-3.1-405B-Instruct","databricks/databricks-meta-llama-3-1-405b-instruct","fireworks_ai/accounts/fireworks/models/llama-v3p1-405b-instruct","hyperbolic/meta-llama/Meta-Llama-3.1-405B-Instruct","lambda_ai/llama3.1-405b-instruct-fp8","llama-3-1-instruct-405b","meta-llama-3-1-405b-instruct","meta-textgeneration-llama-3-1-405b-instruct-fp8","meta.llama3-1-405b-instruct-v1:0","nebius/meta-llama/Meta-Llama-3.1-405B-Instruct","oci/meta.llama-3.1-405b-instruct","sambanova/Meta-Llama-3.1-405B-Instruct","together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo","us.meta.llama3-1-405b-instruct-v1:0","vertex_ai/meta/llama-3.1-405b-instruct-maas"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"meta-llama-3-1-405b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.12,"max_input_per_1m":10.68,"min_output_per_1m":0.3,"max_output_per_1m":16,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["hyperbolic"],"provider_count":11},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen3-vl-32b-instruct","name":"qwen3-vl-32b-instruct","display_name":"Qwen3 VL 32B Instruct","description":"A 32B dense vision-language model from the Qwen3 series with significantly enhanced text understanding, visual perception, and multimodal reasoning capabilities.","creator":"alibaba","family":"qwen3_vl","tier":"","version":null,"type":"language","size_in_bn":32,"modalities":{"input":["image","text"],"output":["text"]},"context_window":262144,"max_output_tokens":32768,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Qwen","capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2025-10-23","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":3,"ids":["accounts/fireworks/models/qwen3-vl-32b-instruct","alibaba-qwen3-vl-32b-instruct","dashscope/qwen3-vl-32b-instruct","fireworks_ai/accounts/fireworks/models/qwen3-vl-32b-instruct","qwen/qwen3-vl-32b-instruct","qwen3-vl-32b-instruct"],"hf_likes":202,"hf_downloads":2356383,"hf_downloads_all_time":8776345,"hf_trending_score":0,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen3-vl-32b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.104,"max_input_per_1m":0.9,"min_output_per_1m":0.416,"max_output_per_1m":0.9,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":3},"providers":[],"regions":[],"region_info":{}}},{"id":"deepseek-r1-distill-qwen-32b","name":"deepseek-r1-distill-qwen-32b","display_name":"DeepSeek R1 Distill Qwen 32B","description":"A 32B Qwen-based model distilled from DeepSeek R1's reasoning capabilities, offering high-quality chain-of-thought performance at a mid-scale parameter count.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":32,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":32768,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-07-31","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Qwen","capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-01-29","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":8,"ids":["@cf/deepseek-ai/deepseek-r1-distill-qwen-32b","accounts/fireworks/models/deepseek-r1-distill-qwen-32b","deepinfra/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B","deepseek-llm-r1-distill-qwen-32b","deepseek-r1-distill-qwen-32b","deepseek/deepseek-r1-distill-qwen-32b","fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-32b","novita/deepseek/deepseek-r1-distill-qwen-32b","nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"],"hf_likes":1545,"hf_downloads":1046750,"hf_downloads_all_time":23929632,"hf_trending_score":3,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"deepseek-r1-distill-qwen-32b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.15,"max_input_per_1m":0.9,"min_output_per_1m":0.15,"max_output_per_1m":4.881,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["nscale"],"provider_count":8},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen3-235b-a22b-instruct","name":"qwen3-235b-a22b-instruct","display_name":"Qwen3 235B A22B Instruct","description":"An instruction-tuned update of the Qwen3 235B A22B MoE model with significant improvements in instruction following, logical reasoning, and general capabilities.","creator":"alibaba","family":"qwen3_moe","tier":"","version":null,"type":"language","size_in_bn":235,"modalities":{"input":["text"],"output":["text"]},"context_window":262144,"max_output_tokens":32768,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":10,"ids":["accounts/fireworks/models/qwen3-235b-a22b-instruct-2507","alibaba-qwen3-235b-a22b-instruct","crusoe/Qwen/Qwen3-235B-A22B-Instruct-2507","deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507","fireworks_ai/accounts/fireworks/models/qwen3-235b-a22b-instruct-2507","novita/qwen/qwen3-235b-a22b-instruct-2507","qwen/qwen3-235b-a22b-instruct-2507","Qwen/Qwen3-235B-A22B-Instruct-2507","qwen3-235b-a22b-instruct","qwen3-235b-a22b-instruct-2507","replicate/qwen/qwen3-235b-a22b-instruct-2507","wandb/Qwen/Qwen3-235B-A22B-Instruct-2507"],"hf_likes":773,"hf_downloads":150781,"hf_downloads_all_time":1182969,"hf_trending_score":1,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen3-235b-a22b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.09,"max_input_per_1m":10,"min_output_per_1m":0.58,"max_output_per_1m":10,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["deepinfra","huggingface","novita"],"provider_count":10},"providers":[],"regions":[],"region_info":{}}},{"id":"deepseek-v3","name":"v3","display_name":"DeepSeek V3","description":"DeepSeek's third-generation MoE LLM with 671B total parameters (37B activated per token), excelling at coding, reasoning, and tool use.","creator":"deepseek","family":"deepseek-v3","tier":"","version":"3.0","type":"language","size_in_bn":684.531,"modalities":{"input":["image","text"],"output":["text"]},"context_window":163840,"max_output_tokens":81920,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-07-31","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"DeepSeek","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":true,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2024-12-26","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":11,"ids":["accounts/fireworks/models/deepseek-v3","azure_ai/deepseek-v3","deepinfra/deepseek-ai/DeepSeek-V3","deepseek-ai/DeepSeek-V3","deepseek-chat","deepseek-v3","deepseek.v3-v1:0","deepseek/deepseek_v3","deepseek/deepseek-chat","deepseek/deepseek-v3","fireworks_ai/accounts/fireworks/models/deepseek-v3","hyperbolic/deepseek-ai/DeepSeek-V3","nebius/deepseek-ai/DeepSeek-V3","openrouter/deepseek/deepseek-chat","replicate/deepseek-ai/deepseek-v3","together_ai/deepseek-ai/DeepSeek-V3","vercel_ai_gateway/deepseek/deepseek-v3"],"hf_likes":4056,"hf_downloads":882009,"hf_downloads_all_time":15156328,"hf_trending_score":4,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"deepseek-v3","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.2,"max_input_per_1m":1.45,"min_output_per_1m":0.2,"max_output_per_1m":4.56,"min_cache_read_per_1m":0.028,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["hyperbolic"],"provider_count":11},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen3-vl-30b-a3b-instruct","name":"qwen3-vl-30b-a3b-instruct","display_name":"Qwen3 VL 30B A3B Instruct","description":"An instruction-tuned vision-language MoE model with 30B total and 3B activated parameters, offering strong multimodal understanding and generation capabilities.","creator":"alibaba","family":"qwen3_vl_moe","tier":"","version":null,"type":"language","size_in_bn":30,"modalities":{"input":["image","text"],"output":["text"]},"context_window":262144,"max_output_tokens":32768,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2025-03-31","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Qwen3","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-10-06","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":5,"ids":["accounts/fireworks/models/qwen3-vl-30b-a3b-instruct","alibaba-qwen3-vl-30b-a3b-instruct","fireworks_ai/accounts/fireworks/models/qwen3-vl-30b-a3b-instruct","novita/qwen/qwen3-vl-30b-a3b-instruct","qwen/qwen3-vl-30b-a3b-instruct","Qwen/Qwen3-VL-30B-A3B-Instruct","qwen3-vl-30b-a3b-instruct"],"hf_likes":562,"hf_downloads":2219395,"hf_downloads_all_time":14070852,"hf_trending_score":2,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen3-vl-30b-a3b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.13,"max_input_per_1m":0.2,"min_output_per_1m":0.52,"max_output_per_1m":0.8,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":5},"providers":[],"regions":[],"region_info":{}}},{"id":"deepseek-r1-distill-qwen-14b","name":"deepseek-r1-distill-qwen-14b","display_name":"DeepSeek R1 Distill Qwen 14B","description":"A 14B Qwen-based model distilled from DeepSeek R1, balancing strong reasoning performance with moderate computational requirements.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":14,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":5,"ids":["accounts/fireworks/models/deepseek-r1-distill-qwen-14b","deepseek-llm-r1-distill-qwen-14b","deepseek-r1-distill-qwen-14b","deepseek/deepseek-r1-distill-qwen-14b","fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-14b","novita/deepseek/deepseek-r1-distill-qwen-14b","nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"deepseek-r1-distill-qwen-14b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.07,"max_input_per_1m":0.2,"min_output_per_1m":0.07,"max_output_per_1m":0.431,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["nscale"],"provider_count":5},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen2-5-72b-instruct","name":"qwen2-5-72b-instruct","display_name":"Qwen2.5 72B Instruct","description":"A 72-billion-parameter instruction-tuned LLM from Alibaba's Qwen2.5 series, excelling at natural language understanding, summarization, and dialogue.","creator":"alibaba","family":"qwen2","tier":"","version":null,"type":"language","size_in_bn":72,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-06-30","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Qwen","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2024-09-19","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":8,"ids":["accounts/fireworks/models/qwen2p5-72b-instruct","alibaba-qwen2-5-72b-instruct","deepinfra/Qwen/Qwen2.5-72B-Instruct","fireworks_ai/accounts/fireworks/models/qwen2p5-72b-instruct","huggingface-llm-qwen2-5-72b-instruct","hyperbolic/Qwen/Qwen2.5-72B-Instruct","nebius/Qwen/Qwen2.5-72B-Instruct","novita/qwen/qwen-2.5-72b-instruct","qwen/qwen-2.5-72b-instruct","Qwen/Qwen2.5-72B-Instruct","qwen2-5-72b-instruct","qwen2.5-72b-instruct"],"hf_likes":927,"hf_downloads":457915,"hf_downloads_all_time":5817981,"hf_trending_score":1,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen2-5-72b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.12,"max_input_per_1m":1.4,"min_output_per_1m":0.3,"max_output_per_1m":5.6,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["deepinfra","hyperbolic"],"provider_count":8},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwq-32b-preview","name":"qwq-32b-preview","display_name":"QwQ 32B Preview","description":"An experimental 32B reasoning LLM from Alibaba's Qwen team, showcasing open-model capabilities comparable to leading closed frontier models on reasoning benchmarks.","creator":"alibaba","family":"qwen2","tier":"","version":null,"type":"language","size_in_bn":32,"modalities":{"input":["text"],"output":["text"]},"context_window":32768,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":2,"ids":["accounts/fireworks/models/qwen-qwq-32b-preview","alibaba-qwq-32b-preview","fireworks_ai/accounts/fireworks/models/qwen-qwq-32b-preview","qwq-32b-preview","QwQ-32B-Preview"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwq-32b-preview","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.287,"max_input_per_1m":0.9,"min_output_per_1m":0.861,"max_output_per_1m":0.9,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["alibaba_qwen"],"provider_count":2},"providers":[],"regions":[],"region_info":{}}},{"id":"mistral-devstral-small","name":"devstral-small","display_name":"Devstral Small","description":"Mistral AI's lightweight agentic coding model built with All Hands AI, specializing in tool-use, codebase exploration, and multi-file editing.","creator":"mistral","family":"mistral","tier":"small","version":null,"type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":256000,"max_output_tokens":64000,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2025-03-31","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Mistral","capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":true,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-05-07","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":3,"ids":["accounts/fireworks/models/devstral-small-2505","devstral-small","devstral-small-2505","fireworks_ai/accounts/fireworks/models/devstral-small-2505","mistral-devstral-small","mistral/devstral-small","mistral/devstral-small-2505","mistral/devstral-small-2507","mistral/devstral-small-latest","mistral/labs-devstral-small-2512","mistralai/devstral-small","vercel_ai_gateway/mistral/devstral-small"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"mistral-devstral-small","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.1,"max_input_per_1m":0.9,"min_output_per_1m":0.3,"max_output_per_1m":0.9,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["mistral","vercel_ai_gateway"],"provider_count":3},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-3-3-70b-instruct","name":"llama-3-3-70b-instruct","display_name":"Llama 3.3 70B Instruct","description":"Meta's 70B instruction-tuned LLM from Llama 3.3, optimized for complex instruction-following and deployed across multiple cloud regions.","creator":"meta","family":"llama","tier":"","version":"3-3","type":"language","size_in_bn":70,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":120000,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2023-12","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Llama3","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2024-12-06","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":20,"ids":["accounts/fireworks/models/llama-v3p3-70b-instruct","azure_ai/Llama-3.3-70B-Instruct","crusoe/meta-llama/Llama-3.3-70B-Instruct","databricks/databricks-meta-llama-3-3-70b-instruct","deepinfra/meta-llama/Llama-3.3-70B-Instruct","fireworks_ai/accounts/fireworks/models/llama-v3p3-70b-instruct","gradient_ai/llama3.3-70b-instruct","groq/llama-3.3-70b-versatile","hyperbolic/meta-llama/Llama-3.3-70B-Instruct","lambda_ai/llama3.3-70b-instruct-fp8","llama-3-3-instruct-70b","llama-3.3-70b-instruct-maas","meta_llama/Llama-3.3-70B-Instruct","meta-llama-3-3-70b-instruct","meta-llama/llama-3.3-70b-instruct","meta-llama/llama-3.3-70b-instruct:free","meta-textgeneration-llama-3-3-70b-instruct","meta.llama3-3-70b-instruct-v1:0","meta.llama3-3-70b-instruct-v1:0:128k","nebius/meta-llama/Llama-3.3-70B-Instruct","novita/meta-llama/llama-3.3-70b-instruct","nscale/meta-llama/Llama-3.3-70B-Instruct","oci/meta.llama-3.3-70b-instruct","oci/meta.llama-3.3-70b-instruct-fp8-dynamic","ovhcloud/Meta-Llama-3_3-70B-Instruct","publishers/google/models/llama-3.3-70b-instruct-maas","publishers/meta/models/llama-3.3-70b-instruct-maas","sambanova/Meta-Llama-3.3-70B-Instruct","together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free","us.meta.llama3-3-70b-instruct-v1:0","wandb/meta-llama/Llama-3.3-70B-Instruct","watsonx/meta-llama/llama-3-3-70b-instruct"],"hf_likes":2731,"hf_downloads":496024,"hf_downloads_all_time":10779619,"hf_trending_score":1.5,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"meta-llama-3-3-70b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.1,"max_input_per_1m":71,"min_output_per_1m":0.2,"max_output_per_1m":71,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":20},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen3-vl-8b-instruct","name":"qwen3-vl-8b-instruct","display_name":"Qwen3 VL 8B Instruct","description":"An instruction-tuned 8B vision-language model from the Qwen3 series, optimized for conversational multimodal tasks involving text and image inputs.","creator":"alibaba","family":"qwen3_vl","tier":"","version":null,"type":"language","size_in_bn":8,"modalities":{"input":["image","text"],"output":["text"]},"context_window":256000,"max_output_tokens":32768,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Qwen3","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-10-14","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":5,"ids":["accounts/fireworks/models/qwen3-vl-8b-instruct","alibaba-qwen3-vl-8b-instruct","fireworks_ai/accounts/fireworks/models/qwen3-vl-8b-instruct","huggingface-vlm-qwen3-vl-8b-instruct","novita/qwen/qwen3-vl-8b-instruct","qwen/qwen3-vl-8b-instruct","qwen3-vl-8b-instruct"],"hf_likes":874,"hf_downloads":3765920,"hf_downloads_all_time":23111974,"hf_trending_score":15,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen3-vl-8b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.08,"max_input_per_1m":0.2,"min_output_per_1m":0.2,"max_output_per_1m":0.7,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["huggingface","novita","openrouter"],"provider_count":5},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen2-5-32b-instruct","name":"qwen2-5-32b-instruct","display_name":"Qwen2.5 32B Instruct","description":"A 32-billion-parameter instruction-tuned LLM from Alibaba's Qwen2.5 series, optimized for following complex instructions and text generation tasks.","creator":"alibaba","family":"qwen2","tier":"","version":null,"type":"language","size_in_bn":32,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":8192,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":3,"ids":["accounts/fireworks/models/qwen2p5-32b-instruct","alibaba-qwen2-5-32b-instruct","fireworks_ai/accounts/fireworks/models/qwen2p5-32b-instruct","huggingface-llm-qwen2-5-32b-instruct","nebius/Qwen/Qwen2.5-32B-Instruct","qwen2.5-32b-instruct"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen2-5-32b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.06,"max_input_per_1m":0.9,"min_output_per_1m":0.2,"max_output_per_1m":2.8,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["nebius"],"provider_count":3},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen2-5-coder-32b-instruct","name":"qwen2-5-coder-32b-instruct","display_name":"Qwen2.5 Coder 32B Instruct","description":"A 32-billion-parameter instruction-tuned code LLM from Alibaba's Qwen2.5-Coder series, excelling at code generation, debugging, and explanation across many programming languages.","creator":"alibaba","family":"qwen2","tier":"","version":null,"type":"language","size_in_bn":32,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":32768,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-06-30","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Qwen","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2024-11-11","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":9,"ids":["@cf/qwen/qwen2.5-coder-32b-instruct","accounts/fireworks/models/qwen2p5-coder-32b-instruct","accounts/fireworks/models/qwen2p5-coder-32b-instruct-128k","accounts/fireworks/models/qwen2p5-coder-32b-instruct-32k-rope","accounts/fireworks/models/qwen2p5-coder-32b-instruct-64k","alibaba-qwen2-5-coder-32b-instruct","fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct","fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct-128k","fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct-32k-rope","fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct-64k","huggingface-llm-qwen2-5-coder-32b-instruct","hyperbolic/Qwen/Qwen2.5-Coder-32B-Instruct","lambda_ai/qwen25-coder-32b-instruct","nscale/Qwen/Qwen2.5-Coder-32B-Instruct","openrouter/qwen/qwen-2.5-coder-32b-instruct","ovhcloud/Qwen2.5-Coder-32B-Instruct","qwen/qwen-2.5-coder-32b-instruct","qwen2-5-coder-32b-instruct","qwen2.5-coder-32b-instruct"],"hf_likes":2008,"hf_downloads":1257495,"hf_downloads_all_time":5998607,"hf_trending_score":0,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen2-5-coder-32b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.05,"max_input_per_1m":0.9,"min_output_per_1m":0.1,"max_output_per_1m":1,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["lambda"],"provider_count":9},"providers":[],"regions":[],"region_info":{}}},{"id":"zhipu-glm-4-5v","name":"glm-4-5v","display_name":"GLM-4.5V","description":"A multimodal MoE vision-language model from Z AI based on GLM-4.5 Air, delivering strong visual reasoning and tool-use performance.","creator":"zhipu","family":"glm4v_moe","tier":"","version":"4-5v","type":"language","size_in_bn":107.711,"modalities":{"input":["image","text"],"output":["text"]},"context_window":131072,"max_output_tokens":32000,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-12-31","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Other","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2025-08-11","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":6,"ids":["accounts/fireworks/models/glm-4p5v","fireworks_ai/accounts/fireworks/models/glm-4p5v","glm-4-5v","glm-4-5v-reasoning","novita/zai-org/glm-4.5v","z-ai/glm-4.5v","zai-org/glm-4.5v","zai/glm-4.5v","zhipu-glm-4-5v"],"hf_likes":717,"hf_downloads":44600,"hf_downloads_all_time":417587,"hf_trending_score":2,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"zhipu-glm-4-5v","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.6,"max_input_per_1m":1.2,"min_output_per_1m":1.2,"max_output_per_1m":1.8,"min_cache_read_per_1m":0.11,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["huggingface","novita","openrouter","vercel_ai_gateway","z_ai"],"provider_count":6},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen3-30b-a3b-instruct","name":"qwen3-30b-a3b-instruct","display_name":"Qwen3 30B A3B Instruct","description":"An instruction-tuned Qwen3 MoE model with 30B total and 3B active parameters, optimized for text generation and instruction-following tasks.","creator":"alibaba","family":"qwen3_moe","tier":"","version":null,"type":"language","size_in_bn":30,"modalities":{"input":["text"],"output":["text"]},"context_window":262144,"max_output_tokens":32768,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2025-06-30","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Qwen3","capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-07-29","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":3,"ids":["accounts/fireworks/models/qwen3-30b-a3b-instruct-2507","alibaba-qwen3-30b-a3b-instruct","fireworks_ai/accounts/fireworks/models/qwen3-30b-a3b-instruct-2507","huggingface-reasoning-qwen3-30b-a3b-instruct-2507","qwen/qwen3-30b-a3b-instruct-2507","qwen3-30b-a3b-instruct","qwen3-30b-a3b-instruct-2507"],"hf_likes":801,"hf_downloads":983022,"hf_downloads_all_time":10380022,"hf_trending_score":2,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen3-30b-a3b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.04815,"max_input_per_1m":0.5,"min_output_per_1m":0.19305,"max_output_per_1m":0.8,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":3},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen3-4b-instruct","name":"qwen3-4b-instruct","display_name":"Qwen3 4B Instruct","description":"An instruction-tuned 4B Qwen3 model offering efficient text generation and reasoning in a small parameter footprint.","creator":"alibaba","family":"qwen3","tier":"","version":null,"type":"language","size_in_bn":4,"modalities":{"input":["text"],"output":["text"]},"context_window":262144,"max_output_tokens":32768,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":2,"ids":["accounts/fireworks/models/qwen3-4b-instruct-2507","alibaba-qwen3-4b-instruct","fireworks_ai/accounts/fireworks/models/qwen3-4b-instruct-2507","huggingface-reasoning-qwen3-4b-instruct-2507","lemonade/Qwen3-4B-Instruct-2507-GGUF","qwen3-4b-2507-instruct","qwen3-4b-2507-instruct-reasoning","qwen3-4b-instruct","qwen3-4b-instruct-reasoning"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen3-4b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.01,"max_input_per_1m":0.2,"min_output_per_1m":0.03,"max_output_per_1m":0.2,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["huggingface"],"provider_count":2},"providers":[],"regions":[],"region_info":{}}},{"id":"deepseek-v2-5","name":"deepseek-v2-5","display_name":"DeepSeek V2.5","description":"An upgraded DeepSeek model merging the capabilities of DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct into a single unified assistant.","creator":"deepseek","family":"deepseek-v2","tier":"","version":"2.5","type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":32768,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["accounts/fireworks/models/deepseek-v2p5","deepseek-v2-5","deepseek-v2-5-sep-2024","fireworks_ai/accounts/fireworks/models/deepseek-v2p5"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"deepseek-v2-5","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":1.2,"max_input_per_1m":1.2,"min_output_per_1m":1.2,"max_output_per_1m":1.2,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["fireworks_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-3-1-70b-instruct","name":"llama-3-1-70b-instruct","display_name":"Llama 3.1 70B Instruct","description":"Meta's 70B instruction-tuned LLM with strong tool-use and multilingual capabilities, widely deployed across cloud regions for enterprise workloads.","creator":"meta","family":"llama","tier":"","version":"3-1","type":"language","size_in_bn":70,"modalities":{"input":["image","text"],"output":["text"]},"context_window":131072,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2023-12-31","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Llama3","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2024-07-23","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":13,"ids":["accounts/fireworks/models/llama-v3p1-70b-instruct","accounts/fireworks/models/llama-v3p1-70b-instruct-1b","azure_ai/Meta-Llama-3.1-70B-Instruct","deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct","deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo","fireworks_ai/accounts/fireworks/models/llama-v3p1-70b-instruct","fireworks_ai/accounts/fireworks/models/llama-v3p1-70b-instruct-1b","friendliai/meta-llama-3.1-70b-instruct","hyperbolic/meta-llama/Meta-Llama-3.1-70B-Instruct","lambda_ai/llama3.1-70b-instruct-fp8","llama-3-1-instruct-70b","meta-llama-3-1-70b-instruct","meta-llama/llama-3.1-70b-instruct","meta-llama/Meta-Llama-3.1-70B-Instruct","meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo","meta-textgeneration-llama-3-1-70b-instruct","meta-textgenerationneuron-llama-3-1-70b-instruct","meta.llama3-1-70b-instruct-v1:0","meta.llama3-1-70b-instruct-v1:0:128k","nebius/meta-llama/Meta-Llama-3.1-70B-Instruct","oci/meta.llama-3.1-70b-instruct","ovhcloud/Meta-Llama-3_1-70B-Instruct","perplexity/llama-3.1-70b-instruct","together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo","us.meta.llama3-1-70b-instruct-v1:0","vertex_ai/meta/llama-3.1-70b-instruct-maas"],"hf_likes":907,"hf_downloads":737459,"hf_downloads_all_time":20735812,"hf_trending_score":0,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"meta-llama-3-1-70b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.12,"max_input_per_1m":2.68,"min_output_per_1m":0.3,"max_output_per_1m":3.54,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["hyperbolic","lambda"],"provider_count":13},"providers":[],"regions":[],"region_info":{}}},{"id":"deepseek-r1-distill-llama-8b","name":"deepseek-r1-distill-llama-8b","display_name":"DeepSeek R1 Distill Llama 8B","description":"A compact 8B Llama-based model distilled from DeepSeek R1, delivering strong reasoning performance in a lightweight architecture.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":8,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":3,"ids":["accounts/fireworks/models/deepseek-r1-distill-llama-8b","deepseek-llm-r1-distill-llama-8b","deepseek-r1-distill-llama-8b","fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-llama-8b","nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-8B"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"deepseek-r1-distill-llama-8b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.025,"max_input_per_1m":0.2,"min_output_per_1m":0.025,"max_output_per_1m":0.2,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["nscale"],"provider_count":3},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-3-1-8b-instruct","name":"llama-3-1-8b-instruct","display_name":"Llama 3.1 8B Instruct","description":"Meta's 8B instruction-tuned LLM optimized for fast, cost-effective deployment across multiple cloud regions with strong instruction-following performance.","creator":"meta","family":"llama","tier":"","version":"3-1","type":"language","size_in_bn":8,"modalities":{"input":["image","text"],"output":["text"]},"context_window":200000,"max_output_tokens":128000,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2023-12-31","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Llama3","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2024-07-23","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":21,"ids":["@cf/meta/llama-3.1-8b-instruct","accounts/fireworks/models/full-llama-v3p1-8b-instruct-8b-fp8","accounts/fireworks/models/full-llama-v3p1-8b-instruct-8b-fp8-amd","accounts/fireworks/models/llama-v3p1-8b-instruct","azure_ai/Meta-Llama-3.1-8B-Instruct","databricks/databricks-meta-llama-3-1-8b-instruct","deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct","deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo","fireworks_ai/accounts/fireworks/models/llama-v3p1-8b-instruct","friendliai/meta-llama-3.1-8b-instruct","groq/llama-3.1-8b-instant","hyperbolic/meta-llama/Meta-Llama-3.1-8B-Instruct","lambda_ai/llama3.1-8b-instruct","llama-3-1-instruct-8b","meta-llama-3-1-8b-instruct","meta-llama/llama-3.1-8b-instruct","meta-llama/Meta-Llama-3.1-8B-Instruct","meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo","meta-textgeneration-llama-3-1-8b-instruct","meta-textgenerationneuron-llama-3-1-8b-instruct","meta.llama3-1-8b-instruct-v1:0","meta.llama3-1-8b-instruct-v1:0:128k","nebius/meta-llama/Meta-Llama-3.1-8B-Instruct","novita/meta-llama/llama-3.1-8b-instruct","nscale/meta-llama/Llama-3.1-8B-Instruct","oci/meta.llama-3.1-8b-instruct","ovhcloud/Llama-3.1-8B-Instruct","perplexity/llama-3.1-8b-instruct","sambanova/Meta-Llama-3.1-8B-Instruct","together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo","us.meta.llama3-1-8b-instruct-v1:0","vertex_ai/meta/llama-3.1-8b-instruct-maas","wandb/meta-llama/Llama-3.1-8B-Instruct"],"hf_likes":5731,"hf_downloads":9306502,"hf_downloads_all_time":140394735,"hf_trending_score":24,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"meta-llama-3-1-8b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.02,"max_input_per_1m":22,"min_output_per_1m":0.03,"max_output_per_1m":22,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["huggingface","nebius","novita","openrouter"],"provider_count":21},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen2-72b-instruct","name":"qwen2-72b-instruct","display_name":"Qwen2 72B Instruct","description":"A 72-billion-parameter instruction-tuned LLM from Alibaba's Qwen2 series, excelling at natural language understanding, summarization, and dialogue.","creator":"alibaba","family":"qwen2","tier":"","version":null,"type":"language","size_in_bn":72,"modalities":{"input":["text"],"output":["text"]},"context_window":32768,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["accounts/fireworks/models/qwen2-72b-instruct","alibaba-qwen2-72b-instruct","fireworks_ai/accounts/fireworks/models/qwen2-72b-instruct","qwen2-72b-instruct"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen2-72b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.9,"max_input_per_1m":0.9,"min_output_per_1m":0.9,"max_output_per_1m":0.9,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["fireworks_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"nvidia-nemotron-nano-2-12b-vl","name":"nvidia-nemotron-nano-2-12b-vl","display_name":"Nemotron Nano 2 12B VL","description":"A 12B-parameter vision-language model from NVIDIA's Nemotron Nano v2 series, enabling multi-image reasoning, video understanding, and document intelligence.","creator":"nvidia","family":"nemotronh_nano_vl_v2","tier":"","version":"2","type":"language","size_in_bn":12,"modalities":{"input":["image","text","video"],"output":["text"]},"context_window":131072,"max_output_tokens":4096,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Other","capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2025-10-28","earliest_deprecation_date":"2026-05-07","deprecated":false,"has_pricing":true,"provider_count":2,"ids":["accounts/fireworks/models/nemotron-nano-v2-12b-vl","fireworks_ai/accounts/fireworks/models/nemotron-nano-v2-12b-vl","nvidia-nemotron-nano-12b-v2-vl","nvidia-nemotron-nano-12b-v2-vl-reasoning","nvidia-nemotron-nano-2-12b-vl","nvidia/nemotron-nano-12b-v2-vl","nvidia/nemotron-nano-12b-v2-vl:free","nvidia/NVIDIA-Nemotron-Nano-12B-v2-VL","publishers/nvidia/models/nemotron-nano-12b-v2-vl"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"nvidia-nemotron-nano-2-12b-vl","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.1,"max_input_per_1m":0.2,"min_output_per_1m":0.1,"max_output_per_1m":0.6,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["fireworks_ai"],"provider_count":2},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen2-5-coder-7b-instruct","name":"qwen2-5-coder-7b-instruct","display_name":"Qwen2.5 Coder 7B Instruct","description":"A 7-billion-parameter instruction-tuned code LLM from Alibaba's Qwen2.5-Coder series, designed for responsive code generation and developer assistance.","creator":"alibaba","family":"qwen2","tier":"","version":null,"type":"language","size_in_bn":7,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":8192,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":4,"ids":["accounts/fireworks/models/qwen2p5-coder-7b-instruct","alibaba-qwen2-5-coder-7b-instruct","fireworks_ai/accounts/fireworks/models/qwen2p5-coder-7b-instruct","huggingface-llm-qwen2-5-coder-7b-instruct","nscale/Qwen/Qwen2.5-Coder-7B-Instruct","qwen2-5-coder-7b-instruct","qwen2.5-coder-7b-instruct"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"alibaba-qwen2-5-coder-7b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.01,"max_input_per_1m":0.2,"min_output_per_1m":0.03,"max_output_per_1m":0.287,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["huggingface","nscale"],"provider_count":4},"providers":[],"regions":[],"region_info":{}}},{"id":"mistral-mixtral-8x22b-instruct","name":"mistral-mixtral-8x22b-instruct","display_name":"Mixtral 8x22B Instruct","description":"The instruction-tuned version of Mistral AI's Mixtral 8x22B MoE model, optimized for following complex instructions and multi-turn dialogue.","creator":"mistral","family":"mixtral","tier":"","version":null,"type":"language","size_in_bn":22,"modalities":{"input":["text"],"output":["text"]},"context_window":65536,"max_output_tokens":2048,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-01-31","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Mistral","capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2024-04-17","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":6,"ids":["accounts/fireworks/models/mixtral-8x22b-instruct","anyscale/mistralai/Mixtral-8x22B-Instruct-v0.1","fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct","fireworks_ai/accounts/fireworks/models/mixtral-8x22b-instruct-hf","huggingface-llm-mistralai-mixtral-8x22B-instruct-v0-1","mistral-8x22b-instruct","mistral-mixtral-8x22b-instruct","mistral/mixtral-8x22b-instruct","mistralai/mixtral-8x22b-instruct","nscale/mistralai/mixtral-8x22b-instruct-v0.1","ollama/mixtral-8x22B-Instruct-v0.1","openrouter/mistralai/mixtral-8x22b-instruct","vercel_ai_gateway/mistral/mixtral-8x22b-instruct"],"hf_likes":748,"hf_downloads":28279,"hf_downloads_all_time":6052030,"hf_trending_score":0,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"mistral-mixtral-8x22b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.6,"max_input_per_1m":2,"min_output_per_1m":0.6,"max_output_per_1m":6,"min_cache_read_per_1m":0.2,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["nscale"],"provider_count":6},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-2-7b-chat","name":"llama-2-7b-chat","display_name":"Llama 2 7B Chat","description":"A 7B Llama 2 model fine-tuned with RLHF for dialogue use cases, offering an efficient and accessible conversational LLM.","creator":"meta","family":"llama","tier":"","version":"2","type":"language","size_in_bn":7,"modalities":{"input":["text"],"output":["text"]},"context_window":4096,"max_output_tokens":4096,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":4,"ids":["@cf/meta/llama-2-7b-chat-fp16","accounts/fireworks/models/llama-v2-7b-chat","anyscale/meta-llama/Llama-2-7b-chat-hf","cloudflare/@cf/meta/llama-2-7b-chat-fp16","cloudflare/@cf/meta/llama-2-7b-chat-int8","fireworks_ai/accounts/fireworks/models/llama-v2-7b-chat","llama-2-chat-7b","meta-llama-2-7b-chat","replicate/meta/llama-2-7b-chat"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"meta-llama-2-7b-chat","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.05,"max_input_per_1m":0.556,"min_output_per_1m":0.15,"max_output_per_1m":6.667,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["replicate"],"provider_count":4},"providers":[],"regions":[],"region_info":{}}},{"id":"deepseek-r1-distill-qwen-1-5b","name":"deepseek-r1-distill-qwen-1-5b","display_name":"DeepSeek R1 Distill Qwen 1.5B","description":"A 1.5B Qwen-based model distilled from DeepSeek R1's reasoning chains, offering chain-of-thought capabilities in an extremely compact form factor.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":1.5,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":3,"ids":["accounts/fireworks/models/deepseek-r1-distill-qwen-1p5b","deepseek-llm-r1-distill-qwen-1-5b","deepseek-r1-distill-qwen-1-5b","fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-1p5b","nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"deepseek-r1-distill-qwen-1-5b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.09,"max_input_per_1m":0.1,"min_output_per_1m":0.09,"max_output_per_1m":0.1,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["nscale"],"provider_count":3},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-3-70b-instruct","name":"llama-3-70b-instruct","display_name":"Llama 3 70B Instruct","description":"Meta's 70B instruction-tuned LLM from the Llama 3 generation, widely used for enterprise conversational AI and complex instruction-following tasks.","creator":"meta","family":"llama","tier":"","version":"3","type":"language","size_in_bn":70,"modalities":{"input":["pdf","text"],"output":["text"]},"context_window":131072,"max_output_tokens":8192,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2023-12-31","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Llama3","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2024-04-18","earliest_deprecation_date":"2026-06-19","deprecated":false,"has_pricing":true,"provider_count":9,"ids":["accounts/fireworks/models/llama-v3-70b-instruct","accounts/fireworks/models/llama-v3-70b-instruct-hf","accounts/fireworks/models/llama-v3-70b-instruct-v2","anyscale/meta-llama/Meta-Llama-3-70B-Instruct","azure_ai/Meta-Llama-3-70B-Instruct","bedrock/ap-south-1/meta.llama3-70b-instruct-v1:0","bedrock/ca-central-1/meta.llama3-70b-instruct-v1:0","bedrock/eu-west-1/meta.llama3-70b-instruct-v1:0","bedrock/eu-west-2/meta.llama3-70b-instruct-v1:0","bedrock/sa-east-1/meta.llama3-70b-instruct-v1:0","bedrock/us-east-1/meta.llama3-70b-instruct-v1:0","bedrock/us-gov-east-1/meta.llama3-70b-instruct-v1:0","bedrock/us-gov-west-1/meta.llama3-70b-instruct-v1:0","bedrock/us-west-1/meta.llama3-70b-instruct-v1:0","databricks/databricks-meta-llama-3-70b-instruct","fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct","fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct-hf","hyperbolic/meta-llama/Meta-Llama-3-70B-Instruct","llama-3-instruct-70b","meta-llama-3-70b-instruct","meta-llama/llama-3-70b-instruct","meta-textgeneration-llama-3-70b-instruct","meta-textgenerationneuron-llama-3-70b-instruct","meta.llama3-70b-instruct-v1:0","novita/meta-llama/llama-3-70b-instruct","openrouter/meta-llama/llama-3-70b-instruct","replicate/meta/llama-3-70b-instruct","vertex_ai/meta/llama3-70b-instruct-maas"],"hf_likes":1510,"hf_downloads":44220,"hf_downloads_all_time":5966217,"hf_trending_score":1,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"meta-llama-3-70b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.12,"max_input_per_1m":2.65,"min_output_per_1m":0.3,"max_output_per_1m":3.5,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["hyperbolic"],"provider_count":9},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-2-13b-chat","name":"llama-2-13b-chat","display_name":"Llama 2 13B Chat","description":"A 13B Llama 2 model fine-tuned with RLHF for dialogue use cases, optimized for helpful and safe conversational interactions.","creator":"meta","family":"llama","tier":"","version":"2","type":"language","size_in_bn":13,"modalities":{"input":["text"],"output":["text"]},"context_window":4096,"max_output_tokens":4096,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":4,"ids":["accounts/fireworks/models/llama-v2-13b-chat","anyscale/meta-llama/Llama-2-13b-chat-hf","fireworks_ai/accounts/fireworks/models/llama-v2-13b-chat","llama-2-chat-13b","meta-llama-2-13b-chat","meta.llama2-13b-chat-v1","replicate/meta/llama-2-13b-chat"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"meta-llama-2-13b-chat","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.1,"max_input_per_1m":0.75,"min_output_per_1m":0.2,"max_output_per_1m":1,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["replicate"],"provider_count":4},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-2-70b-chat","name":"llama-2-70b-chat","display_name":"Llama 2 70B Chat","description":"A 70B Llama 2 model fine-tuned with RLHF for dialogue, providing high-quality conversational responses at the largest Llama 2 scale.","creator":"meta","family":"llama","tier":"","version":"2","type":"language","size_in_bn":70,"modalities":{"input":["text"],"output":["text"]},"context_window":4096,"max_output_tokens":4096,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":6,"ids":["anyscale/meta-llama/Llama-2-70b-chat-hf","databricks/databricks-llama-2-70b-chat","fireworks_ai/accounts/fireworks/models/llama-v2-70b-chat","llama-2-chat-70b","meta-llama-2-70b-chat","meta.llama2-70b-chat-v1","perplexity/llama-2-70b-chat","replicate/meta/llama-2-70b-chat","snowflake/llama2-70b-chat"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-13 08:02:30","pricing":{"model_id":"meta-llama-2-70b-chat","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-13","ingestion_date":"2026-06-13","summary":{"currency":"USD","min_input_per_1m":0.50001,"max_input_per_1m":1.95,"min_output_per_1m":0.9,"max_output_per_1m":2.8,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["databricks"],"provider_count":6},"providers":[],"regions":[],"region_info":{}}}],"pagination":{"page_size":50,"has_next":true,"next_token":"NTA","total_count":220},"meta":{"updated_at":"2026-06-13","request_id":"eb7f3df5-d50b-4f81-8eda-ece867750373","execution_ms":11}}