package openai import ( "encoding/json" "fmt" "net/http" "strings" "time" "github.com/songquanpeng/one-api/common/logger" "github.com/songquanpeng/one-api/relay/channeltype" "github.com/songquanpeng/one-api/relay/model" ) var metadataHTTPClient = &http.Client{Timeout: 8 * time.Second} // usageMetadataResponse is a generic structure for upstream metadata endpoints // that return nativeTokens (e.g. Zenmux /api/v1/generation). type usageMetadataResponse struct { NativeTokens struct { PromptTokenCount int `json:"promptTokenCount"` CandidatesTokenCount int `json:"candidatesTokenCount"` TotalTokenCount int `json:"totalTokenCount"` // ThoughtsTokenCount is billed at the completion rate, included in CompletionTokens. ThoughtsTokenCount int `json:"thoughtsTokenCount"` } `json:"nativeTokens"` } // FetchUsageFromMetadataURL fetches accurate token usage from an upstream metadata // endpoint. urlTemplate must contain {id} which is replaced with generationId. // Returns nil if the fetch fails or the response contains no usable token data. func FetchUsageFromMetadataURL(urlTemplate, generationId string) (*model.Usage, error) { url := strings.ReplaceAll(urlTemplate, "{id}", generationId) resp, err := metadataHTTPClient.Get(url) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("metadata API returned status %d", resp.StatusCode) } var meta usageMetadataResponse if err := json.NewDecoder(resp.Body).Decode(&meta); err != nil { return nil, err } if meta.NativeTokens.TotalTokenCount == 0 { return nil, fmt.Errorf("metadata API returned zero total tokens") } prompt := meta.NativeTokens.PromptTokenCount completion := meta.NativeTokens.CandidatesTokenCount + meta.NativeTokens.ThoughtsTokenCount logger.SysLog(fmt.Sprintf("usage from metadata API (id=%s): prompt=%d completion=%d", generationId, prompt, completion)) return &model.Usage{ PromptTokens: prompt, CompletionTokens: completion, TotalTokens: prompt + completion, }, nil } // countOutputMediaTokens returns a fixed token estimate for any embedded // media data URIs found in text (image/video/audio), consistent with the // fixed estimates used for input media in CountTokenMessages. func countOutputMediaTokens(text string) int { tokens := 0 tokens += strings.Count(text, "data:image/") * 2500 tokens += strings.Count(text, "data:video/") * 10000 tokens += strings.Count(text, "data:audio/") * 1500 return tokens } func ResponseText2Usage(responseText string, modelName string, promptTokens int) *model.Usage { usage := &model.Usage{} usage.PromptTokens = promptTokens usage.CompletionTokens = CountTokenText(stripBase64Payloads(responseText), modelName) + countOutputMediaTokens(responseText) usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens return usage } func GetFullRequestURL(baseURL string, requestURL string, channelType int) string { if channelType == channeltype.OpenAICompatible { return fmt.Sprintf("%s%s", strings.TrimSuffix(baseURL, "/"), strings.TrimPrefix(requestURL, "/v1")) } fullRequestURL := fmt.Sprintf("%s%s", baseURL, requestURL) if strings.HasPrefix(baseURL, "https://gateway.ai.cloudflare.com") { switch channelType { case channeltype.OpenAI: fullRequestURL = fmt.Sprintf("%s%s", baseURL, strings.TrimPrefix(requestURL, "/v1")) case channeltype.Azure: fullRequestURL = fmt.Sprintf("%s%s", baseURL, strings.TrimPrefix(requestURL, "/openai/deployments")) } } return fullRequestURL }