{"data":[{"id":"moonshot-kimi-k2-5","name":"kimi-k2-5","display_name":"Kimi K2.5","description":"An updated iteration of Kimi K2 with enhanced reasoning, vision, and tool-use capabilities, supporting implicit caching for efficient inference.","creator":"moonshot","family":"kimi_k25","tier":"","version":"k2-5","type":"language","size_in_bn":1058.589,"modalities":{"input":["image","text","video"],"output":["text"]},"context_window":262144,"max_output_tokens":98304,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Other","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2026-01-27","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":12,"ids":["@cf/moonshotai/kimi-k2.5","accounts/fireworks/models/kimi-k2p5","azure_ai/kimi-k2.5","baseten/moonshotai/Kimi-K2.5","bedrock/ap-northeast-1/moonshotai.kimi-k2.5","bedrock/ap-south-1/moonshotai.kimi-k2.5","bedrock/ap-southeast-3/moonshotai.kimi-k2.5","bedrock/eu-north-1/moonshotai.kimi-k2.5","bedrock/moonshotai.kimi-k2.5","bedrock/sa-east-1/moonshotai.kimi-k2.5","bedrock/us-east-1/moonshotai.kimi-k2.5","bedrock/us-east-2/moonshotai.kimi-k2.5","bedrock/us-west-2/moonshotai.kimi-k2.5","fireworks_ai/accounts/fireworks/models/kimi-k2p5","fireworks_ai/kimi-k2p5","huggingface-llm-kimi-k2-5","kimi-k2-5","kimi-k2-5-non-reasoning","kimi-k2.5","moonshot-kimi-k2-5","moonshot/kimi-k2.5","moonshotai.kimi-k2.5","moonshotai/kimi-k2.5","moonshotai/Kimi-K2.5","openrouter/moonshotai/kimi-k2.5","together_ai/moonshotai/Kimi-K2.5","wandb/moonshotai/Kimi-K2.5"],"hf_likes":2753,"hf_downloads":5222216,"hf_downloads_all_time":9851195,"hf_trending_score":34,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"moonshot-kimi-k2-5","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.35,"max_input_per_1m":0.6,"min_output_per_1m":1.89,"max_output_per_1m":3.011,"min_cache_read_per_1m":0.1,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":12},"providers":[],"regions":[],"region_info":{}}},{"id":"openai-gpt-oss-120b","name":"gpt-oss-120b","display_name":"GPT OSS 120B","description":"A 120-billion-parameter open-weights GPT model from OpenAI designed for reasoning-intensive tasks with implicit caching support.","creator":"openai","family":"gpt_oss","tier":"","version":null,"type":"language","size_in_bn":120,"modalities":{"input":["image","text"],"output":["text"]},"context_window":131072,"max_output_tokens":131072,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-06","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"GPT","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":true,"reasoning":true,"web_search":true,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-08-05","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":21,"ids":["@cf/openai/gpt-oss-120b","accounts/fireworks/models/gpt-oss-120b","azure_ai/gpt-oss-120b","baseten/openai/gpt-oss-120b","bedrock_mantle/openai.gpt-oss-120b","cerebras/gpt-oss-120b","crusoe/openai/gpt-oss-120b","databricks/databricks-gpt-oss-120b","deepinfra/openai/gpt-oss-120b","fireworks_ai/accounts/fireworks/models/gpt-oss-120b","gpt-oss-120b","gpt-oss-120b-low","gpt-oss-120b-maas","groq/openai/gpt-oss-120b","lemonade/gpt-oss-120b-mxfp-GGUF","novita/openai/gpt-oss-120b","ollama/gpt-oss:120b-cloud","openai-gpt-oss-120b","openai-reasoning-gpt-oss-120b","openai.gpt-oss-120b-1:0","openai/gpt-oss-120b","openai/gpt-oss-120b:free","openrouter/openai/gpt-oss-120b","ovhcloud/gpt-oss-120b","publishers/google/models/gpt-oss-120b-maas","replicate/openai/gpt-oss-120b","sambanova/gpt-oss-120b","together_ai/openai/gpt-oss-120b","vertex_ai/openai/gpt-oss-120b-maas","wandb/openai/gpt-oss-120b","watsonx/openai/gpt-oss-120b"],"hf_likes":4719,"hf_downloads":3524674,"hf_downloads_all_time":32348365,"hf_trending_score":25,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"openai-gpt-oss-120b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.039,"max_input_per_1m":15,"min_output_per_1m":0.18,"max_output_per_1m":60,"min_cache_read_per_1m":0.075,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":21},"providers":[],"regions":[],"region_info":{}}},{"id":"openai-gpt-oss-20b","name":"gpt-oss-20b","display_name":"GPT OSS 20B","description":"A 20-billion-parameter open-weights GPT model from OpenAI suited for reasoning and tool-use tasks at a smaller, more efficient scale.","creator":"openai","family":"gpt_oss","tier":"","version":null,"type":"language","size_in_bn":20,"modalities":{"input":["image","text"],"output":["text"]},"context_window":131072,"max_output_tokens":131072,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-06","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"GPT","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":true,"web_search":true,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-08-05","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":16,"ids":["@cf/openai/gpt-oss-20b","accounts/fireworks/models/gpt-oss-20b","bedrock_mantle/openai.gpt-oss-20b","databricks/databricks-gpt-oss-20b","deepinfra/openai/gpt-oss-20b","fireworks_ai/accounts/fireworks/models/gpt-oss-20b","gpt-oss-20b","gpt-oss-20b-low","gpt-oss-20b-maas","groq/openai/gpt-oss-20b","lemonade/gpt-oss-20b-mxfp4-GGUF","novita/openai/gpt-oss-20b","ollama/gpt-oss:20b-cloud","openai-gpt-oss-20b","openai-reasoning-gpt-oss-20b","openai.gpt-oss-20b-1:0","openai/gpt-oss-20b","openai/gpt-oss-20b:free","openrouter/openai/gpt-oss-20b","ovhcloud/gpt-oss-20b","publishers/google/models/gpt-oss-20b-maas","replicateopenai/gpt-oss-20b","together_ai/openai/gpt-oss-20b","vertex_ai/openai/gpt-oss-20b-maas","wandb/openai/gpt-oss-20b"],"hf_likes":4552,"hf_downloads":6455272,"hf_downloads_all_time":59707566,"hf_trending_score":12,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"openai-gpt-oss-20b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.029,"max_input_per_1m":5,"min_output_per_1m":0.14,"max_output_per_1m":20,"min_cache_read_per_1m":0.0375,"min_cache_write_per_1m":0.007,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":16},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwq-32b","name":"qwq-32b","display_name":"QwQ 32B","description":"A 32B reasoning-focused LLM from Alibaba's Qwen team, designed to match frontier closed-model performance on complex reasoning and problem-solving tasks.","creator":"alibaba","family":"qwen2","tier":"","version":null,"type":"language","size_in_bn":32,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-06-30","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Qwen","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":false,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2025-03-05","earliest_deprecation_date":"2026-04-29","deprecated":false,"has_pricing":true,"provider_count":9,"ids":["@cf/qwen/qwq-32b","accounts/fireworks/models/qwq-32b","alibaba-qwq-32b","deepinfra/Qwen/QwQ-32B","fireworks_ai/accounts/fireworks/models/qwq-32b","huggingface-llm-qwq-32b","hyperbolic/Qwen/QwQ-32B","nebius/Qwen/QwQ-32B","nscale/Qwen/QwQ-32B","qwen/qwq-32b","qwq-32b","sambanova/QwQ-32B"],"hf_likes":2906,"hf_downloads":73024,"hf_downloads_all_time":2875672,"hf_trending_score":1,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"alibaba-qwq-32b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.15,"max_input_per_1m":0.9,"min_output_per_1m":0.2,"max_output_per_1m":1,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["deepinfra","nebius"],"provider_count":9},"providers":[],"regions":[],"region_info":{}}},{"id":"deepseek-r1-distill-qwen-32b","name":"deepseek-r1-distill-qwen-32b","display_name":"DeepSeek R1 Distill Qwen 32B","description":"A 32B Qwen-based model distilled from DeepSeek R1's reasoning capabilities, offering high-quality chain-of-thought performance at a mid-scale parameter count.","creator":"deepseek","family":"deepseek-r1","tier":"","version":"1.0","type":"language","size_in_bn":32,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":32768,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-07-31","training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Qwen","capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-01-29","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":8,"ids":["@cf/deepseek-ai/deepseek-r1-distill-qwen-32b","accounts/fireworks/models/deepseek-r1-distill-qwen-32b","deepinfra/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B","deepseek-llm-r1-distill-qwen-32b","deepseek-r1-distill-qwen-32b","deepseek/deepseek-r1-distill-qwen-32b","fireworks_ai/accounts/fireworks/models/deepseek-r1-distill-qwen-32b","novita/deepseek/deepseek-r1-distill-qwen-32b","nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"],"hf_likes":1545,"hf_downloads":1046750,"hf_downloads_all_time":23929632,"hf_trending_score":3,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"deepseek-r1-distill-qwen-32b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.15,"max_input_per_1m":0.9,"min_output_per_1m":0.15,"max_output_per_1m":4.881,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["nscale"],"provider_count":8},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen2-5-coder-32b-instruct","name":"qwen2-5-coder-32b-instruct","display_name":"Qwen2.5 Coder 32B Instruct","description":"A 32-billion-parameter instruction-tuned code LLM from Alibaba's Qwen2.5-Coder series, excelling at code generation, debugging, and explanation across many programming languages.","creator":"alibaba","family":"qwen2","tier":"","version":null,"type":"language","size_in_bn":32,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":32768,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-06-30","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Qwen","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2024-11-11","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":9,"ids":["@cf/qwen/qwen2.5-coder-32b-instruct","accounts/fireworks/models/qwen2p5-coder-32b-instruct","accounts/fireworks/models/qwen2p5-coder-32b-instruct-128k","accounts/fireworks/models/qwen2p5-coder-32b-instruct-32k-rope","accounts/fireworks/models/qwen2p5-coder-32b-instruct-64k","alibaba-qwen2-5-coder-32b-instruct","fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct","fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct-128k","fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct-32k-rope","fireworks_ai/accounts/fireworks/models/qwen2p5-coder-32b-instruct-64k","huggingface-llm-qwen2-5-coder-32b-instruct","hyperbolic/Qwen/Qwen2.5-Coder-32B-Instruct","lambda_ai/qwen25-coder-32b-instruct","nscale/Qwen/Qwen2.5-Coder-32B-Instruct","openrouter/qwen/qwen-2.5-coder-32b-instruct","ovhcloud/Qwen2.5-Coder-32B-Instruct","qwen/qwen-2.5-coder-32b-instruct","qwen2-5-coder-32b-instruct","qwen2.5-coder-32b-instruct"],"hf_likes":2008,"hf_downloads":1257495,"hf_downloads_all_time":5998607,"hf_trending_score":0,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"alibaba-qwen2-5-coder-32b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.05,"max_input_per_1m":0.9,"min_output_per_1m":0.1,"max_output_per_1m":1,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["lambda"],"provider_count":9},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-3-1-8b-instruct","name":"llama-3-1-8b-instruct","display_name":"Llama 3.1 8B Instruct","description":"Meta's 8B instruction-tuned LLM optimized for fast, cost-effective deployment across multiple cloud regions with strong instruction-following performance.","creator":"meta","family":"llama","tier":"","version":"3-1","type":"language","size_in_bn":8,"modalities":{"input":["image","text"],"output":["text"]},"context_window":200000,"max_output_tokens":128000,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2023-12-31","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Llama3","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2024-07-23","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":21,"ids":["@cf/meta/llama-3.1-8b-instruct","accounts/fireworks/models/full-llama-v3p1-8b-instruct-8b-fp8","accounts/fireworks/models/full-llama-v3p1-8b-instruct-8b-fp8-amd","accounts/fireworks/models/llama-v3p1-8b-instruct","azure_ai/Meta-Llama-3.1-8B-Instruct","databricks/databricks-meta-llama-3-1-8b-instruct","deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct","deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo","fireworks_ai/accounts/fireworks/models/llama-v3p1-8b-instruct","friendliai/meta-llama-3.1-8b-instruct","groq/llama-3.1-8b-instant","hyperbolic/meta-llama/Meta-Llama-3.1-8B-Instruct","lambda_ai/llama3.1-8b-instruct","llama-3-1-instruct-8b","meta-llama-3-1-8b-instruct","meta-llama/llama-3.1-8b-instruct","meta-llama/Meta-Llama-3.1-8B-Instruct","meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo","meta-textgeneration-llama-3-1-8b-instruct","meta-textgenerationneuron-llama-3-1-8b-instruct","meta.llama3-1-8b-instruct-v1:0","meta.llama3-1-8b-instruct-v1:0:128k","nebius/meta-llama/Meta-Llama-3.1-8B-Instruct","novita/meta-llama/llama-3.1-8b-instruct","nscale/meta-llama/Llama-3.1-8B-Instruct","oci/meta.llama-3.1-8b-instruct","ovhcloud/Llama-3.1-8B-Instruct","perplexity/llama-3.1-8b-instruct","sambanova/Meta-Llama-3.1-8B-Instruct","together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo","us.meta.llama3-1-8b-instruct-v1:0","vertex_ai/meta/llama-3.1-8b-instruct-maas","wandb/meta-llama/Llama-3.1-8B-Instruct"],"hf_likes":5731,"hf_downloads":9306502,"hf_downloads_all_time":140394735,"hf_trending_score":24,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"meta-llama-3-1-8b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.02,"max_input_per_1m":22,"min_output_per_1m":0.03,"max_output_per_1m":22,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["huggingface","nebius","novita","openrouter"],"provider_count":21},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-2-7b-chat","name":"llama-2-7b-chat","display_name":"Llama 2 7B Chat","description":"A 7B Llama 2 model fine-tuned with RLHF for dialogue use cases, offering an efficient and accessible conversational LLM.","creator":"meta","family":"llama","tier":"","version":"2","type":"language","size_in_bn":7,"modalities":{"input":["text"],"output":["text"]},"context_window":4096,"max_output_tokens":4096,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":4,"ids":["@cf/meta/llama-2-7b-chat-fp16","accounts/fireworks/models/llama-v2-7b-chat","anyscale/meta-llama/Llama-2-7b-chat-hf","cloudflare/@cf/meta/llama-2-7b-chat-fp16","cloudflare/@cf/meta/llama-2-7b-chat-int8","fireworks_ai/accounts/fireworks/models/llama-v2-7b-chat","llama-2-chat-7b","meta-llama-2-7b-chat","replicate/meta/llama-2-7b-chat"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"meta-llama-2-7b-chat","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.05,"max_input_per_1m":0.556,"min_output_per_1m":0.15,"max_output_per_1m":6.667,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["replicate"],"provider_count":4},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-3-8b-instruct","name":"llama-3-8b-instruct","display_name":"Llama 3 8B Instruct","description":"Meta's 8B instruction-tuned LLM from the Llama 3 generation, offering fast and cost-effective instruction-following across diverse tasks.","creator":"meta","family":"llama","tier":"","version":"3","type":"language","size_in_bn":8,"modalities":{"input":["pdf","text"],"output":["text"]},"context_window":32000,"max_output_tokens":8192,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2023-12-31","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Llama3","capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2024-04-18","earliest_deprecation_date":"2026-06-19","deprecated":false,"has_pricing":true,"provider_count":9,"ids":["@cf/meta/llama-3-8b-instruct","accounts/fireworks/models/llama-v3-8b-instruct","accounts/fireworks/models/llama-v3-8b-instruct-hf","accounts/fireworks/models/llama-v3-8b-instruct-v0","anyscale/meta-llama/Meta-Llama-3-8B-Instruct","bedrock/ap-south-1/meta.llama3-8b-instruct-v1:0","bedrock/ca-central-1/meta.llama3-8b-instruct-v1:0","bedrock/eu-west-1/meta.llama3-8b-instruct-v1:0","bedrock/eu-west-2/meta.llama3-8b-instruct-v1:0","bedrock/sa-east-1/meta.llama3-8b-instruct-v1:0","bedrock/us-east-1/meta.llama3-8b-instruct-v1:0","bedrock/us-gov-east-1/meta.llama3-8b-instruct-v1:0","bedrock/us-gov-west-1/meta.llama3-8b-instruct-v1:0","bedrock/us-west-1/meta.llama3-8b-instruct-v1:0","deepinfra/meta-llama/Meta-Llama-3-8B-Instruct","fireworks_ai/accounts/fireworks/models/llama-v3-8b-instruct-hf","gradient_ai/llama3-8b-instruct","huggingface-llm-gradientai-llama-3-8B-instruct-262k","huggingface-llm-llama-3-8b-instruct-gradient","llama-3-instruct-8b","meta-llama-3-8b-instruct","meta-llama/llama-3-8b-instruct","meta-llama/Meta-Llama-3-8B-Instruct","meta-textgeneration-llama-3-8b-instruct","meta-textgenerationneuron-llama-3-8b-instruct","meta.llama3-8b-instruct-v1:0","novita/meta-llama/llama-3-8b-instruct","replicate/meta/llama-3-8b-instruct","vertex_ai/meta/llama3-8b-instruct-maas"],"hf_likes":4486,"hf_downloads":1342402,"hf_downloads_all_time":40122839,"hf_trending_score":1.5,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"meta-llama-3-8b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.03,"max_input_per_1m":0.3,"min_output_per_1m":0.04,"max_output_per_1m":0.827,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["deepinfra"],"provider_count":9},"providers":[],"regions":[],"region_info":{}}},{"id":"deepgram-aura-1","name":"aura-1","display_name":"Aura 1","description":"Context-aware text-to-speech model that applies natural pacing, expressiveness, and conversational fillers based on the semantic content of the input text.","creator":"deepgram","family":"aura","tier":"","version":null,"type":"text-to-speech","size_in_bn":null,"modalities":{"input":["text"],"output":["audio"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/deepgram/aura-1","deepgram-aura-1"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18"},{"id":"deepgram-aura-2-en","name":"aura-2-en","display_name":"Aura 2 EN","description":"Second-generation context-aware English TTS model with improved natural pacing, expressiveness, and filler generation for voice agent applications.","creator":"deepgram","family":"aura","tier":"","version":null,"type":"text-to-speech","size_in_bn":null,"modalities":{"input":["text"],"output":["audio"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/deepgram/aura-2-en","deepgram-aura-2-en"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18"},{"id":"deepgram-aura-2-es","name":"aura-2-es","display_name":"Aura 2 ES","description":"Second-generation context-aware Spanish TTS model with natural pacing, expressiveness, and filler generation optimized for Spanish-language voice agent applications.","creator":"deepgram","family":"aura","tier":"","version":null,"type":"text-to-speech","size_in_bn":null,"modalities":{"input":["text"],"output":["audio"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/deepgram/aura-2-es","deepgram-aura-2-es"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18"},{"id":"baai-bge-1-5-base-en","name":"bge-1-5-base-en","display_name":"BGE Base EN V1.5","description":"A base-size English text embedding model from BAAI's BGE series, optimized for sentence similarity and semantic search tasks.","creator":"baai","family":"embed","tier":"","version":"1-5","type":"embedding","size_in_bn":0.109,"modalities":{"input":["text"],"output":["embedding"]},"context_window":153600,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[768],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":2,"ids":["@cf/baai/bge-base-en-v1.5","baai-bge-1-5-base-en","BAAI/bge-base-en-v1.5","huggingface-sentencesimilarity-bge-base-en-v1-5","huggingface-textembedding-bge-base-en-v1-5","together_ai/baai/bge-base-en-v1.5","together_ai/BAAI/bge-base-en-v1.5"],"hf_likes":411,"hf_downloads":6725168,"hf_downloads_all_time":519748129,"hf_trending_score":1,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"baai-bge-1-5-base-en","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.008,"max_input_per_1m":0.067,"min_output_per_1m":null,"max_output_per_1m":null,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["together_ai"],"provider_count":2},"providers":[],"regions":[],"region_info":{}}},{"id":"baai-bge-1-5-large-en","name":"bge-1-5-large-en","display_name":"BGE Large EN V1.5","description":"A large-size English text embedding model from BAAI's BGE series, delivering high-quality sentence similarity representations.","creator":"baai","family":"bge","tier":"","version":"1-5","type":"embedding","size_in_bn":0.335,"modalities":{"input":["text"],"output":["embedding"]},"context_window":512,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[1024],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/baai/bge-large-en-v1.5","baai-bge-1-5-large-en","BAAI/bge-large-en-v1.5","huggingface-sentencesimilarity-bge-large-en-v1-5"],"hf_likes":653,"hf_downloads":11154544,"hf_downloads_all_time":91618478,"hf_trending_score":1,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"baai-bge-1-5-large-en","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.204,"max_input_per_1m":0.204,"min_output_per_1m":null,"max_output_per_1m":null,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"baai-bge-reranker-base","name":"bge-reranker-base","display_name":"BGE Reranker Base","description":"Cross-encoder reranker model that takes a query and passage as input and directly outputs a relevance similarity score, designed for retrieval re-ranking pipelines.","creator":"baai","family":"bge","tier":"","version":null,"type":"reranking","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/baai/bge-reranker-base","baai-bge-reranker-base"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"baai-bge-reranker-base","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.003,"max_input_per_1m":0.003,"min_output_per_1m":null,"max_output_per_1m":null,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"baai-bge-1-5-small-en","name":"bge-1-5-small-en","display_name":"BGE Small EN V1.5","description":"A compact English text embedding model from BAAI's BGE series, balancing efficiency and accuracy for sentence similarity tasks.","creator":"baai","family":"bge","tier":"","version":"1-5","type":"embedding","size_in_bn":null,"modalities":{"input":["text"],"output":["embedding"]},"context_window":512,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[384],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/baai/bge-small-en-v1.5","baai-bge-1-5-small-en","huggingface-sentencesimilarity-bge-small-en-v1-5"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"baai-bge-1-5-small-en","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.02,"max_input_per_1m":0.02,"min_output_per_1m":null,"max_output_per_1m":null,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"baai-bge-m3","name":"bge-m3","display_name":"BGE-M3","description":"A multilingual, multi-functionality, multi-granularity text embedding model supporting dense, sparse, and multi-vector retrieval.","creator":"baai","family":"embed","tier":"","version":null,"type":"embedding","size_in_bn":null,"modalities":{"input":["text"],"output":["embedding"]},"context_window":60000,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":2,"ids":["@cf/baai/bge-m3","baai-bge-m3","BAAI/bge-m3","huggingface-sentencesimilarity-bge-m3","novita/baai/bge-m3"],"hf_likes":2931,"hf_downloads":16521768,"hf_downloads_all_time":132322565,"hf_trending_score":17,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"baai-bge-m3","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.01,"max_input_per_1m":0.012,"min_output_per_1m":0.01,"max_output_per_1m":0.01,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["novita"],"provider_count":2},"providers":[],"regions":[],"region_info":{}}},{"id":"huggingface-distilbert-sst-2-int8","name":"distilbert-sst-2-int8","display_name":"DistilBERT SST-2 INT8","description":"INT8-quantized DistilBERT model fine-tuned on the SST-2 dataset for efficient binary sentiment classification.","creator":"huggingface","family":"distilbert","tier":"","version":null,"type":"classification","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/huggingface/distilbert-sst-2-int8","huggingface-distilbert-sst-2-int8"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"huggingface-distilbert-sst-2-int8","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.026,"max_input_per_1m":0.026,"min_output_per_1m":null,"max_output_per_1m":null,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"deepgram-flux","name":"flux","display_name":"Flux","description":"Conversational speech recognition model built specifically for real-time voice agent applications, prioritizing low-latency and turn-aware transcription.","creator":"deepgram","family":"deepgram","tier":"","version":null,"type":"speech-to-text","size_in_bn":null,"modalities":{"input":["audio"],"output":["text"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/deepgram/flux","deepgram-flux"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18"},{"id":"black-forest-labs-flux-2-dev","name":"flux-2-dev","display_name":"FLUX 2 Dev","description":"The second-generation development-tier text-to-image model from Black Forest Labs, building on the FLUX rectified flow transformer architecture.","creator":"black-forest-labs","family":"flux","tier":"","version":"2","type":"image-generation","size_in_bn":null,"modalities":{"input":["text"],"output":["image"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/black-forest-labs/flux-2-dev","black-forest-labs-flux-2-dev","black-forest-labs/FLUX-2-dev","fal_flux-2-dev-flash","fal_flux-2-dev-turbo","flux_flux-2--dev"],"hf_likes":1560,"hf_downloads":201991,"hf_downloads_all_time":1715287,"hf_trending_score":29,"updated_at":"2026-06-11 08:02:18"},{"id":"black-forest-labs-flux-klein-2-4b","name":"flux-klein-2-4b","display_name":"FLUX Klein 2 4B","description":"A compact 4B-parameter FLUX 2 model that unifies image generation and editing in a single fast model optimized for end-to-end inference.","creator":"black-forest-labs","family":"flux","tier":"","version":"2","type":"image-generation","size_in_bn":4,"modalities":{"input":["text"],"output":["image"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/black-forest-labs/flux-2-klein-4b","bfl/flux-2-klein-4b","black-forest-labs-flux-klein-2-4b","black-forest-labs/FLUX-2-klein-4b","flux_flux-2-klein-4b","flux_flux-2-klein-base-4b"],"hf_likes":628,"hf_downloads":244192,"hf_downloads_all_time":749236,"hf_trending_score":9,"updated_at":"2026-06-11 08:02:18"},{"id":"black-forest-labs-flux-klein-2-9b","name":"flux-klein-2-9b","display_name":"FLUX Klein 2 9B","description":"A 9B-parameter FLUX 2 Klein model combining fast inference with unified image generation and editing capabilities.","creator":"black-forest-labs","family":"flux","tier":"","version":"2","type":"image-generation","size_in_bn":9,"modalities":{"input":["text"],"output":["image"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/black-forest-labs/flux-2-klein-9b","bfl/flux-2-klein-9b","black-forest-labs-flux-klein-2-9b","black-forest-labs/FLUX-2-klein-9b","flux_flux-2-klein-9b","flux_flux-2-klein-base-9b"],"hf_likes":702,"hf_downloads":136775,"hf_downloads_all_time":436571,"hf_trending_score":20,"updated_at":"2026-06-11 08:02:18"},{"id":"black-forest-labs-flux-1-schnell","name":"flux-1-schnell","display_name":"FLUX.1 Schnell","description":"A fast, distilled 12B parameter text-to-image model from Black Forest Labs optimized for rapid image generation with minimal inference steps.","creator":"black-forest-labs","family":"image-gen","tier":"","version":"1","type":"image-generation","size_in_bn":null,"modalities":{"input":["text"],"output":["image"]},"context_window":4096,"max_output_tokens":4096,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":2,"ids":["@cf/black-forest-labs/flux-1-schnell","accounts/fireworks/models/flux-1-schnell","black-forest-labs-flux-1-schnell","black-forest-labs/FLUX-1-schnell","fireworks_ai/accounts/fireworks/models/flux-1-schnell","flux-1-schnell","huggingface-txt2img-black-forest-labs-flux-1-schnell","nscale/black-forest-labs/FLUX.1-schnell"],"hf_likes":4794,"hf_downloads":724215,"hf_downloads_all_time":20529902,"hf_trending_score":17,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"black-forest-labs-flux-1-schnell","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.1,"max_input_per_1m":0.1,"min_output_per_1m":0.1,"max_output_per_1m":0.1,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["fireworks_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"google-gemma-3-12b-instruct","name":"gemma-3-12b-instruct","display_name":"Gemma 3 12B Instruct","description":"An instruction-tuned 12B Gemma 3 LLM supporting vision-language inputs and 128k context.","creator":"google","family":"gemma3_text","tier":"","version":"3","type":"language","size_in_bn":12,"modalities":{"input":["image","text"],"output":["text"]},"context_window":131072,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-08-31","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Gemini","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-03-13","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":6,"ids":["@cf/google/gemma-3-12b-it","accounts/fireworks/models/gemma-3-12b-it","crusoe/google/gemma-3-12b-it","deepinfra/google/gemma-3-12b-it","google-gemma-3-12b-instruct","google.gemma-3-12b-it","google/gemma-3-12b-it","google/gemma-3-12b-it:free","novita/google/gemma-3-12b-it"],"hf_likes":707,"hf_downloads":2516014,"hf_downloads_all_time":14080610,"hf_trending_score":2,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"google-gemma-3-12b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.05,"max_input_per_1m":0.345,"min_output_per_1m":0.1,"max_output_per_1m":0.556,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["deepinfra","novita","openrouter"],"provider_count":6},"providers":[],"regions":[],"region_info":{}}},{"id":"google-gemma-4-26b-a4b-instruct","name":"gemma-4-26b-a4b-instruct","display_name":"Gemma 4 26B A4B IT","description":"Instruction-tuned MoE variant of Gemma 4 with 26B total and 4B active parameters, supporting vision, tool use, and file input.","creator":"google","family":"gemma4","tier":"","version":"4","type":"language","size_in_bn":26,"modalities":{"input":["image","pdf","text","video"],"output":["text"]},"context_window":262144,"max_output_tokens":131072,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":"Gemma","capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2026-04-03","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":5,"ids":["@cf/google/gemma-4-26b-a4b-it","accounts/fireworks/models/gemma-4-26b-a4b-it","gemma-4-26b-a4b-it-maas","google-gemma-4-26b-a4b-instruct","google/gemma-4-26b-a4b-it","google/gemma-4-26B-A4B-it","google/gemma-4-26b-a4b-it:free","huggingface-vlm-gemma-4-26b-a4b-it","publishers/google/models/gemma-4-26b-a4b-it-maas","vertex_ai/google/gemma-4-26b-a4b-it-maas"],"hf_likes":751,"hf_downloads":3113602,"hf_downloads_all_time":3115675,"hf_trending_score":92,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"google-gemma-4-26b-a4b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.06,"max_input_per_1m":0.15,"min_output_per_1m":0.3,"max_output_per_1m":0.6,"min_cache_read_per_1m":0.015,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["openrouter"],"provider_count":5},"providers":[],"regions":[],"region_info":{}}},{"id":"z-ai-glm-4-7-flash","name":"glm-4-7-flash","display_name":"GLM-4.7 Flash","description":"Fast and efficient multilingual text generation model with a 131K token context window, optimized for dialogue, instruction-following, and multi-turn tool calling across 100+ languages.","creator":"z-ai","family":"glm","tier":"","version":null,"type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/zai-org/glm-4.7-flash","z-ai-glm-4-7-flash"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"z-ai-glm-4-7-flash","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.06,"max_input_per_1m":0.06,"min_output_per_1m":0.4,"max_output_per_1m":0.4,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"ibm-granite-granite-4-0-h-micro","name":"granite-4-0-h-micro","display_name":"Granite 4.0 H Micro","description":"Compact Granite 4.0 micro-scale instruct model optimized for agentic tasks such as instruction following and function calling with high efficiency.","creator":"ibm-granite","family":"granite","tier":"","version":null,"type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":131000,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/ibm-granite/granite-4.0-h-micro","ibm-granite-granite-4-0-h-micro"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"ibm-granite-granite-4-0-h-micro","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.017,"max_input_per_1m":0.017,"min_output_per_1m":0.112,"max_output_per_1m":0.112,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"ai4bharat-indictrans2-en-indic-1b","name":"indictrans2-en-indic-1b","display_name":"IndicTrans2 EN Indic 1B","description":"Open-source multilingual neural machine translation model supporting high-quality translations across all 22 scheduled Indic languages, with a 1B parameter scale.","creator":"ai4bharat","family":"indictrans","tier":"","version":null,"type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/ai4bharat/indictrans2-en-indic-1B","ai4bharat-indictrans2-en-indic-1b"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"ai4bharat-indictrans2-en-indic-1b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.342,"max_input_per_1m":0.342,"min_output_per_1m":0.342,"max_output_per_1m":0.342,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"moonshotai-kimi-k2-6","name":"kimi-k2-6","display_name":"Kimi K2.6","description":"Frontier-scale open-source 1-trillion-parameter mixture-of-experts model with a 262K context window, vision inputs, and multi-turn tool calling for agentic workloads.","creator":"moonshotai","family":"kimi","tier":"","version":null,"type":"language","size_in_bn":null,"modalities":{"input":["image","text"],"output":["text"]},"context_window":262144,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/moonshotai/kimi-k2.6","moonshotai-kimi-k2-6"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"moonshotai-kimi-k2-6","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.95,"max_input_per_1m":0.95,"min_output_per_1m":4,"max_output_per_1m":4,"min_cache_read_per_1m":0.16,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"leonardo-lucid-origin","name":"lucid-origin","display_name":"Leonardo AI Lucid Origin","description":"Leonardo AI's highly prompt-responsive image generation model designed for versatility across graphic design, photorealistic renders, and highly specific creative outputs.","creator":"leonardo","family":"lucid","tier":"","version":null,"type":"image-generation","size_in_bn":null,"modalities":{"input":["text"],"output":["image"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/leonardo/lucid-origin","leonardo-lucid-origin"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18"},{"id":"leonardo-phoenix-1-0","name":"phoenix-1-0","display_name":"Leonardo AI Phoenix 1.0","description":"Leonardo AI's image generation model emphasizing exceptional prompt adherence and coherent text rendering within generated images.","creator":"leonardo","family":"phoenix","tier":"","version":null,"type":"image-generation","size_in_bn":null,"modalities":{"input":["text"],"output":["image"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/leonardo/phoenix-1.0","leonardo-phoenix-1-0"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18"},{"id":"meta-llama-3-8b-instruct-awq","name":"llama-3-8b-instruct-awq","display_name":"Llama 3 8B Instruct AWQ","description":"An AWQ-quantized build of Llama 3 8B Instruct enabling memory-efficient deployment of Meta's third-generation open LLM for chat and instruction tasks.","creator":"meta","family":"llama","tier":"","version":null,"type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/meta/llama-3-8b-instruct-awq","meta-llama-3-8b-instruct-awq"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"meta-llama-3-8b-instruct-awq","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.123,"max_input_per_1m":0.123,"min_output_per_1m":0.266,"max_output_per_1m":0.266,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-3-1-70b-instruct-fp8-fast","name":"llama-3-1-70b-instruct-fp8-fast","display_name":"Llama 3.1 70B Instruct FP8 Fast","description":"A quantized FP8 variant of Llama 3.1 70B optimized for faster inference throughput while preserving instruction-following capability.","creator":"meta","family":"llama","tier":"","version":null,"type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/meta/llama-3.1-70b-instruct-fp8-fast","meta-llama-3-1-70b-instruct-fp8-fast"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"meta-llama-3-1-70b-instruct-fp8-fast","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.293,"max_input_per_1m":0.293,"min_output_per_1m":2.253,"max_output_per_1m":2.253,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-3-1-8b-instruct-awq","name":"llama-3-1-8b-instruct-awq","display_name":"Llama 3.1 8B Instruct AWQ","description":"An AWQ-quantized version of Llama 3.1 8B Instruct designed for efficient deployment with reduced memory footprint on consumer and edge hardware.","creator":"meta","family":"llama","tier":"","version":null,"type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/meta/llama-3.1-8b-instruct-awq","meta-llama-3-1-8b-instruct-awq"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"meta-llama-3-1-8b-instruct-awq","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.123,"max_input_per_1m":0.123,"min_output_per_1m":0.266,"max_output_per_1m":0.266,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-3-1-8b-instruct-fp8","name":"llama-3-1-8b-instruct-fp8","display_name":"Llama 3.1 8B Instruct FP8","description":"Llama 3.1 8B instruction-tuned model quantized to FP8 precision for reduced memory footprint and faster inference with minimal accuracy loss.","creator":"meta","family":"llama","tier":"","version":null,"type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":32000,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/meta/llama-3.1-8b-instruct-fp8","meta-llama-3-1-8b-instruct-fp8"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"meta-llama-3-1-8b-instruct-fp8","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.152,"max_input_per_1m":0.152,"min_output_per_1m":0.287,"max_output_per_1m":0.287,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-3-1-8b-instruct-fp8-fast","name":"llama-3-1-8b-instruct-fp8-fast","display_name":"Llama 3.1 8B Instruct FP8 Fast","description":"An FP8-quantized variant of Llama 3.1 8B Instruct tuned for high-speed inference with minimal accuracy loss on instruction-following tasks.","creator":"meta","family":"llama","tier":"","version":null,"type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/meta/llama-3.1-8b-instruct-fp8-fast","meta-llama-3-1-8b-instruct-fp8-fast"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"meta-llama-3-1-8b-instruct-fp8-fast","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.045,"max_input_per_1m":0.045,"min_output_per_1m":0.384,"max_output_per_1m":0.384,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-3-2-11b-vision-instruct","name":"meta-llama-3-2-11b-vision-instruct","display_name":"Llama 3.2 11B Vision Instruct","description":"Meta's 11B instruction-tuned vision-language model optimized for visual recognition, image reasoning, and captioning with multimodal input support.","creator":"meta","family":"mllama","tier":"","version":"3-2","type":"language","size_in_bn":11,"modalities":{"input":["image","text"],"output":["text"]},"context_window":131072,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2023-12-31","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Llama3","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2024-09-25","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":8,"ids":["@cf/meta/llama-3.2-11b-vision-instruct","accounts/fireworks/models/llama-v3p2-11b-vision-instruct","azure_ai/Llama-3.2-11B-Vision-Instruct","deepinfra/meta-llama/Llama-3.2-11B-Vision-Instruct","fireworks_ai/accounts/fireworks/models/llama-v3p2-11b-vision-instruct","lambda_ai/llama3.2-11b-vision-instruct","meta-llama-3-2-11b-vision-instruct","meta-llama/llama-3.2-11b-vision-instruct","meta-llama/Llama-3.2-11B-Vision-Instruct","meta-vlm-llama-3-2-11b-vision-instruct","oci/meta.llama-3.2-11b-vision-instruct","watsonx/meta-llama/llama-3-2-11b-vision-instruct"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"meta-llama-3-2-11b-vision-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.015,"max_input_per_1m":2,"min_output_per_1m":0.025,"max_output_per_1m":2,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["lambda"],"provider_count":8},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-3-3-70b-instruct-fp8-fast","name":"llama-3-3-70b-instruct-fp8-fast","display_name":"Llama 3.3 70B Instruct FP8 Fast","description":"Llama 3.3 70B instruction-tuned model quantized to FP8 precision and further optimized for throughput-focused fast inference deployments.","creator":"meta","family":"llama","tier":"","version":null,"type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":24000,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/meta/llama-3.3-70b-instruct-fp8-fast","meta-llama-3-3-70b-instruct-fp8-fast"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"meta-llama-3-3-70b-instruct-fp8-fast","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.293,"max_input_per_1m":0.293,"min_output_per_1m":2.253,"max_output_per_1m":2.253,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llama-4-17b-scout-instruct","name":"llama-4-17b-scout-instruct","display_name":"Llama 4 17B Scout Instruct","description":"Meta's Llama 4 Scout instruction-tuned MoE model with 17B active parameters, optimized for efficient multimodal inference with a lean expert configuration.","creator":"meta","family":"llama","tier":"","version":"4","type":"language","size_in_bn":17,"modalities":{"input":["image","text"],"output":["text"]},"context_window":10000000,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-08","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":12,"ids":["@cf/meta/llama-4-scout-17b-16e-instruct","azure_ai/Llama-4-Scout-17B-16E-Instruct","deepinfra/meta-llama/Llama-4-Scout-17B-16E-Instruct","groq/meta-llama/llama-4-scout-17b-16e-instruct","lambda_ai/llama-4-scout-17b-16e-instruct","llama-4-scout-17b-16e-instruct-maas","meta-llama-4-17b-scout-instruct","meta-llama/llama-4-scout-17b-16e-instruct","meta-llama/Llama-4-Scout-17B-16E-Instruct","novita/meta-llama/llama-4-scout-17b-16e-instruct","nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct","publishers/google/models/llama-4-scout-17b-16e-instruct-maas","sambanova/Llama-4-Scout-17B-16E-Instruct","together_ai/meta-llama/Llama-4-Scout-17B-16E-Instruct","vertex_ai/meta/llama-4-scout-17b-128e-instruct-maas","vertex_ai/meta/llama-4-scout-17b-16e-instruct-maas","wandb/meta-llama/Llama-4-Scout-17B-16E-Instruct"],"hf_likes":1272,"hf_downloads":390765,"hf_downloads_all_time":5421895,"hf_trending_score":0.5,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"meta-llama-4-17b-scout-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.05,"max_input_per_1m":17,"min_output_per_1m":0.1,"max_output_per_1m":66,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["lambda"],"provider_count":12},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-llamaguard-3-8b","name":"meta-llamaguard-3-8b","display_name":"LlamaGuard 3 8B","description":"An 8B parameter content safety model for classifying harmful content in LLM prompts and responses across multiple risk categories.","creator":"meta","family":"llama","tier":"","version":"3","type":"language","size_in_bn":8,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":16384,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2023-12-31","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Llama3","capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2025-02-12","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":6,"ids":["@cf/meta/llama-guard-3-8b","accounts/fireworks/models/llama-guard-3-8b","deepinfra/meta-llama/Llama-Guard-3-8B","fireworks_ai/accounts/fireworks/models/llama-guard-3-8b","meta-llama/llama-guard-3-8b","meta-llamaguard-3-8b","meta-textgeneration-llama-guard-3-8b","meta-textgenerationneuron-llama-guard-3-8b","nebius/meta-llama/Llama-Guard-3-8B","sambanova/Meta-Llama-Guard-3-8B"],"hf_likes":291,"hf_downloads":131512,"hf_downloads_all_time":4949411,"hf_trending_score":0.5,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"meta-llamaguard-3-8b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.02,"max_input_per_1m":0.484,"min_output_per_1m":0.03,"max_output_per_1m":0.3,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["nebius"],"provider_count":6},"providers":[],"regions":[],"region_info":{}}},{"id":"meta-m2m100-1-2b","name":"m2m100-1-2b","display_name":"M2M100 1.2B","description":"Multilingual encoder-decoder sequence-to-sequence model trained for many-to-many direct translation across 100 languages without pivoting through English.","creator":"meta","family":"m2m100","tier":"","version":null,"type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/meta/m2m100-1.2b","meta-m2m100-1-2b"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"meta-m2m100-1-2b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.342,"max_input_per_1m":0.342,"min_output_per_1m":0.342,"max_output_per_1m":0.342,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"myshell-ai-melotts","name":"melotts","display_name":"MeloTTS","description":"High-quality multilingual text-to-speech library supporting multiple languages with natural-sounding synthesis for voice applications.","creator":"myshell-ai","family":"tts","tier":"","version":null,"type":"text-to-speech","size_in_bn":null,"modalities":{"input":["text"],"output":["audio"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/myshell-ai/melotts","myshell-ai-melotts"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18"},{"id":"mistral-small-3-1-24b-instruct","name":"mistral-small-3-1-24b-instruct","display_name":"Mistral Small 3.1 24B Instruct","description":"An instruction-tuned 24B-parameter multimodal LLM from Mistral, combining vision understanding with strong text generation performance.","creator":"mistral","family":"mistral3","tier":"","version":"3-1","type":"language","size_in_bn":24,"modalities":{"input":["image","text"],"output":["text"]},"context_window":131072,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2023-10-31","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Mistral","capabilities":{"function_calling":true,"parallel_function_calling":true,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2025-03-17","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":3,"ids":["@cf/mistralai/mistral-small-3.1-24b-instruct","mistral-small-3-1-24b-instruct","mistralai/mistral-small-3.1-24b-instruct","openrouter/mistralai/mistral-small-3.1-24b-instruct","watsonx/mistralai/mistral-small-3-1-24b-instruct-2503"],"hf_likes":1355,"hf_downloads":525643,"hf_downloads_all_time":2775582,"hf_trending_score":0,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"mistral-small-3-1-24b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.1,"max_input_per_1m":0.351,"min_output_per_1m":0.3,"max_output_per_1m":0.555,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["ibm_watsonx"],"provider_count":3},"providers":[],"regions":[],"region_info":{}}},{"id":"nvidia-nemotron-3-120b-a12b","name":"nemotron-3-120b-a12b","display_name":"Nemotron Super 3 120B A12B","description":"Hybrid mixture-of-experts model with 120B total and 12B active parameters, designed for high-accuracy multi-agent and specialized agentic AI applications.","creator":"nvidia","family":"nemotron","tier":"","version":null,"type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":256000,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":["default"],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":true,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/nvidia/nemotron-3-120b-a12b","nvidia-nemotron-3-120b-a12b"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"nvidia-nemotron-3-120b-a12b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.5,"max_input_per_1m":0.5,"min_output_per_1m":1.5,"max_output_per_1m":1.5,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"deepgram-nova-3","name":"nova-3","display_name":"Nova 3","description":"Deepgram's third-generation Nova ASR model with improved accuracy and broader language support for general transcription.","creator":"deepgram","family":"nova","tier":"","version":"3","type":"speech-to-text","size_in_bn":null,"modalities":{"input":["audio"],"output":["text"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":2,"ids":["@cf/deepgram/nova-3","deepgram-nova-3","deepgram/nova-3"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18"},{"id":"pfnet-plamo-embedding-1b","name":"plamo-embedding-1b","display_name":"PLaMo Embedding 1B","description":"1B-parameter Japanese text embedding model by Preferred Networks, converting Japanese text into dense numerical vectors for retrieval and semantic similarity tasks.","creator":"pfnet","family":"plamo","tier":"","version":null,"type":"embedding","size_in_bn":null,"modalities":{"input":["text"],"output":["embedding"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/pfnet/plamo-embedding-1b","pfnet-plamo-embedding-1b"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"pfnet-plamo-embedding-1b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.019,"max_input_per_1m":0.019,"min_output_per_1m":null,"max_output_per_1m":null,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"alibaba-qwen3-embedding-0-6b","name":"qwen3-embedding-0-6b","display_name":"Qwen3 Embedding 0.6B","description":"A compact 0.6B-parameter text embedding model from the Qwen3 series, supporting retrieval, classification, clustering, and bitext mining tasks.","creator":"alibaba","family":"qwen3","tier":"","version":null,"type":"embedding","size_in_bn":0.6,"modalities":{"input":["text"],"output":["embedding"]},"context_window":32768,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":"2025-11-14","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":3,"ids":["@cf/qwen/qwen3-embedding-0.6b","accounts/fireworks/models/qwen3-embedding-0p6b","alibaba-qwen3-embedding-0-6b","alibaba/qwen3-embedding-0.6b","fireworks_ai/accounts/fireworks/models/qwen3-embedding-0p6b","huggingface-textembedding-qwen3-embedding-0-6b","novita/qwen/qwen3-embedding-0.6b","Qwen/Qwen3-Embedding-0.6B"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"alibaba-qwen3-embedding-0-6b","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.01,"max_input_per_1m":0.07,"min_output_per_1m":null,"max_output_per_1m":null,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["vercel_ai_gateway"],"provider_count":3},"providers":[],"regions":[],"region_info":{}}},{"id":"microsoft-resnet-50","name":"resnet-50","display_name":"ResNet-50","description":"50-layer deep residual convolutional neural network for image classification, trained on over 1 million ImageNet images across 1,000 categories.","creator":"microsoft","family":"resnet","tier":"","version":null,"type":"classification","size_in_bn":null,"modalities":{"input":["image"],"output":["text"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/microsoft/resnet-50","microsoft-resnet-50"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18"},{"id":"aisingapore-gemma-sea-lion-4-27b-instruct","name":"gemma-sea-lion-4-27b-instruct","display_name":"SEA-LION Gemma 4 27B Instruct","description":"A 27-billion-parameter instruction-tuned SEA-LION model built on Gemma 4, specialized for Southeast Asian languages and multilingual understanding.","creator":"aisingapore","family":"sea-lion","tier":"","version":"4","type":"language","size_in_bn":27,"modalities":{"input":["text"],"output":["text"]},"context_window":128000,"max_output_tokens":4096,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/aisingapore/gemma-sea-lion-v4-27b-it","aisingapore-gemma-sea-lion-4-27b-instruct","publicai/aisingapore/Gemma-SEA-LION-v4-27B-IT"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18","pricing":{"model_id":"aisingapore-gemma-sea-lion-4-27b-instruct","currency":"USD","exchange_rate":1,"exchange_rate_date":"2026-06-12","ingestion_date":"2026-06-11","summary":{"currency":"USD","min_input_per_1m":0.351,"max_input_per_1m":0.351,"min_output_per_1m":0.555,"max_output_per_1m":0.555,"min_cache_read_per_1m":null,"min_cache_write_per_1m":null,"min_reasoning_per_1m":null,"cheapest_providers":["cloudflare_workers_ai"],"provider_count":1},"providers":[],"regions":[],"region_info":{}}},{"id":"pipecat-ai-smart-turn-v2","name":"smart-turn-v2","display_name":"Smart Turn V2","description":"Open-source native audio turn detection model in its second version, designed for real-time voice agent conversation management.","creator":"pipecat-ai","family":"smart-turn","tier":"","version":null,"type":"audio","size_in_bn":null,"modalities":{"input":["audio"],"output":["text"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["@cf/pipecat-ai/smart-turn-v2","pipecat-ai-smart-turn-v2"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-11 08:02:18"}],"pagination":{"page_size":50,"has_next":true,"next_token":"NTA","total_count":52},"meta":{"updated_at":"2026-06-11","request_id":"64b2e1ac-df67-46d3-9713-9ef7eabf048d","execution_ms":19}}