{"data":{"id":"step-3-10b-vl","name":"step-3-10b-vl","display_name":"Step 3 VL 10B","description":"A 10B-parameter vision-language model from StepFun capable of understanding and reasoning over both images and text.","creator":"step","family":"step","tier":"","version":"3","type":"language","size_in_bn":10,"modalities":{"input":["text"],"output":["text"]},"context_window":null,"max_output_tokens":null,"tool_use_system_prompt_tokens":0,"output_vector_sizes":[],"knowledge_cutoff":null,"training_data_cutoff":null,"supported_reasoning_efforts":[],"tokenizer":null,"capabilities":{"function_calling":false,"parallel_function_calling":false,"structured_outputs":false,"prompt_caching":false,"reasoning":false,"web_search":false,"computer_use":false,"code_execution":false,"file_search":false,"url_context":false,"assistant_prefill":false,"native_structured_output":false,"adaptive_reasoning":false},"release_date":null,"earliest_deprecation_date":null,"deprecated":false,"has_pricing":false,"provider_count":0,"ids":["step-3-10b-vl","step-3-vl-10b"],"hf_likes":null,"hf_downloads":null,"hf_downloads_all_time":null,"hf_trending_score":null,"updated_at":"2026-06-23 08:02:26"},"meta":{"updated_at":"2026-06-23","request_id":"10c576c9-5597-4dcc-8f50-5a9b2b9bf82d","execution_ms":2}}