{"data":[{"id":"alibaba-qwen2-5-vl-32b-instruct","name":"qwen2-5-vl-32b-instruct","display_name":"Qwen2.5 VL 32B Instruct","description":"A 32-billion-parameter multimodal vision-language LLM from Alibaba's Qwen2.5-VL series, capable of understanding and reasoning over both images and text.","creator":"alibaba","family":"qwen2_5_vl","tier":"","version":null,"type":"language","size_in_bn":32,"modalities":{"input":["image"],"output":["text"]},"context_window":131072,"max_output_tokens":8192,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":3,"ids":["accounts/fireworks/models/qwen2p5-vl-32b-instruct","alibaba-qwen2-5-vl-32b-instruct","deepinfra/Qwen/Qwen2.5-VL-32B-Instruct","fireworks_ai/accounts/fireworks/models/qwen2p5-vl-32b-instruct","qwen2.5-vl-32b-instruct"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-18 08:02:51"},{"id":"alibaba-qwen2-5-vl-3b-instruct","name":"qwen2-5-vl-3b-instruct","display_name":"Qwen2.5 VL 3B Instruct","description":"A compact 3-billion-parameter multimodal vision-language LLM from Alibaba's Qwen2.5-VL series, suited for image-text tasks in resource-constrained settings.","creator":"alibaba","family":"qwen2_5_vl","tier":"","version":null,"type":"language","size_in_bn":3,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":8192,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":2,"ids":["accounts/fireworks/models/qwen2p5-vl-3b-instruct","alibaba-qwen2-5-vl-3b-instruct","fireworks_ai/accounts/fireworks/models/qwen2p5-vl-3b-instruct","qwen2.5-vl-3b-instruct"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-18 08:02:51"},{"id":"alibaba-qwen2-5-vl-72b-instruct","name":"qwen2-5-vl-72b-instruct","display_name":"Qwen2.5 VL 72B Instruct","description":"A 72-billion-parameter multimodal vision-language LLM from Alibaba's Qwen2.5-VL series, delivering high-capacity image understanding and visual reasoning.","creator":"alibaba","family":"qwen2_5_vl","tier":"","version":null,"type":"language","size_in_bn":72,"modalities":{"input":["image","text"],"output":["text"]},"context_window":131072,"max_output_tokens":128000,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":"2024-06-30","training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":"Qwen","capabilities":{"function_calling":true,"parallel_function_calling":false,"structured_outputs":true,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":true,"adaptive_reasoning":false},"release_date":"2025-02-01","earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":7,"ids":["accounts/fireworks/models/qwen2p5-vl-72b-instruct","alibaba-qwen2-5-vl-72b-instruct","fireworks_ai/accounts/fireworks/models/qwen2p5-vl-72b-instruct","nebius/Qwen/Qwen2.5-VL-72B-Instruct","novita/qwen/qwen2.5-vl-72b-instruct","ovhcloud/Qwen2.5-VL-72B-Instruct","qwen/qwen2.5-vl-72b-instruct","qwen2.5-vl-72b-instruct"],"hf_likes":609,"hf_downloads":103451,"hf_downloads_all_time":5812114,"hf_trending_score":1,"updated_at":"2026-06-18 08:02:51"},{"id":"alibaba-qwen2-5-vl-7b-instruct","name":"qwen2-5-vl-7b-instruct","display_name":"Qwen2.5 VL 7B Instruct","description":"A 7-billion-parameter multimodal vision-language LLM from Alibaba's Qwen2.5-VL series, enabling efficient image-text understanding and generation.","creator":"alibaba","family":"qwen2_5_vl","tier":"","version":null,"type":"language","size_in_bn":7,"modalities":{"input":["text"],"output":["text"]},"context_window":131072,"max_output_tokens":8192,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":2,"ids":["accounts/fireworks/models/qwen2p5-vl-7b-instruct","alibaba-qwen2-5-vl-7b-instruct","fireworks_ai/accounts/fireworks/models/qwen2p5-vl-7b-instruct","qwen2.5-vl-7b-instruct"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-18 08:02:51"},{"id":"rolm-ocr","name":"rolm-ocr","display_name":"Rolm OCR","description":"An open-source document OCR model built on Qwen2.5-VL-7B-Instruct by Reducto AI, offering faster performance and reduced memory usage as a drop-in alternative to olmOCR.","creator":"rolm","family":"qwen2_5_vl","tier":"","version":null,"type":"image-to-text","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":128000,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":true,"provider_count":1,"ids":["accounts/fireworks/models/rolm-ocr","fireworks_ai/accounts/fireworks/models/rolm-ocr","rolm-ocr"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-18 08:02:51"},{"id":"voyage-multimodal-3-5","name":"voyage-multimodal-3-5","display_name":"Voyage Multimodal 3.5","description":"A multimodal embedding model built for retrieval over interleaved text and images—including screenshots, PDFs, tables, and figures—for cross-modal search applications.","creator":"voyage","family":"qwen2_5_vl","tier":"","version":"3-5","type":"language","size_in_bn":null,"modalities":{"input":["text"],"output":["text"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":false,"provider_count":0,"ids":["accounts/fireworks/models/voyage-multimodal-3-5","voyage-multimodal-3-5"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-18 08:02:51"}],"pagination":{"page_size":50,"has_next":false,"next_token":null,"total_count":6},"meta":{"updated_at":"2026-06-18","request_id":"bd728cd9-1058-4208-93bd-7d4bf2fd6626","execution_ms":2}}