hjjjj 5730b2a798
Some checks failed
CI / Unit tests (push) Has been cancelled
CI / commit_lint (push) Has been cancelled
feat: 支持图片编辑功能并优化多模态计费
refactor: 重构 Gemini 适配器以支持图片编辑和生成

feat(relay): 添加图片编辑模式支持

feat(controller): 实现 UsageAPIURL 用于获取真实 token 用量

feat(web): 在渠道测试中添加模型选择功能

perf(token): 优化多模态 token 计算逻辑

fix(web): 修复日志分页组件显示问题

docs: 更新渠道配置中的 UsageAPIURL 说明

style: 清理调试日志和注释

feat(gemini): 支持 Imagen 3+ 图片生成模型

feat(openai): 添加生成 ID 捕获和元数据获取功能
2026-03-17 18:28:54 +08:00

306 lines
9.2 KiB
Go

package gemini
import (
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/gin-gonic/gin"
"github.com/songquanpeng/one-api/common/config"
"github.com/songquanpeng/one-api/common/helper"
channelhelper "github.com/songquanpeng/one-api/relay/adaptor"
"github.com/songquanpeng/one-api/relay/adaptor/openai"
"github.com/songquanpeng/one-api/relay/meta"
"github.com/songquanpeng/one-api/relay/model"
"github.com/songquanpeng/one-api/relay/relaymode"
)
type Adaptor struct {
}
func (a *Adaptor) Init(meta *meta.Meta) {
}
func (a *Adaptor) GetRequestURL(meta *meta.Meta) (string, error) {
defaultVersion := config.GeminiVersion
modelLower := strings.ToLower(meta.ActualModelName)
if strings.Contains(modelLower, "gemini-1.5") ||
strings.Contains(modelLower, "gemini-2.") ||
strings.Contains(modelLower, "gemini-3.") {
defaultVersion = "v1beta"
}
version := helper.AssignOrDefault(meta.Config.APIVersion, defaultVersion)
modelName := meta.ActualModelName
switch meta.Mode {
case relaymode.ImagesEdits:
// Image editing uses :predict (same as generation) with referenceImages in instances[]
if strings.Contains(modelName, "/") {
return fmt.Sprintf("%s/v1/%s:predict", meta.BaseURL, publisherModelPath(modelName)), nil
}
return fmt.Sprintf("%s/v1beta/models/%s:predict", meta.BaseURL, modelName), nil
case relaymode.ImagesGenerations:
// Imagen 3+ models use :generateImages endpoint
if isImagen3Model(modelName) {
return fmt.Sprintf("%s/v1/models/%s:generateImages", meta.BaseURL, publisherModelPath(modelName)), nil
}
// Publisher models (e.g. klingai/kling-v2, volcengine/doubao-*) use v1 :predict
if strings.Contains(modelName, "/") {
return fmt.Sprintf("%s/v1/%s:predict", meta.BaseURL, publisherModelPath(modelName)), nil
}
// Legacy imagegeneration models (no publisher prefix) use v1beta :predict
return fmt.Sprintf("%s/v1beta/models/%s:predict", meta.BaseURL, modelName), nil
case relaymode.Embeddings:
return fmt.Sprintf("%s/%s/models/%s:batchEmbedContents", meta.BaseURL, version, modelName), nil
}
action := "generateContent"
if meta.IsStream {
action = "streamGenerateContent?alt=sse"
}
return fmt.Sprintf("%s/%s/%s:%s", meta.BaseURL, version, publisherModelPath(modelName), action), nil
}
func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Request, meta *meta.Meta) error {
channelhelper.SetupCommonRequestHeader(c, req, meta)
req.Header.Set("x-goog-api-key", meta.APIKey)
return nil
}
func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.GeneralOpenAIRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
switch relayMode {
case relaymode.Embeddings:
geminiEmbeddingRequest := ConvertEmbeddingRequest(*request)
return geminiEmbeddingRequest, nil
default:
geminiRequest := ConvertRequest(*request)
return geminiRequest, nil
}
}
func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) {
if request == nil {
return nil, errors.New("request is nil")
}
n := request.N
if n <= 0 {
n = 1
}
// If an image is provided, convert to :editImage format (image-to-image)
if request.Image != nil {
imageStr, ok := request.Image.(string)
if !ok {
return nil, errors.New("image field must be a base64 string or data URL")
}
// Strip data URL prefix if present (e.g. "data:image/png;base64,")
base64Data := imageStr
if idx := strings.Index(imageStr, ";base64,"); idx != -1 {
base64Data = imageStr[idx+8:]
}
return EditImageRequest{
Instances: []EditImageInstance{
{
Prompt: request.Prompt,
ReferenceImages: []ReferenceImageItem{
{
ReferenceType: "REFERENCE_TYPE_RAW",
ReferenceId: 1,
ReferenceImage: ReferenceImageData{
BytesBase64Encoded: base64Data,
},
},
},
},
},
}, nil
}
if isImagen3Model(request.Model) {
return GenerateImagesRequest{
Prompt: request.Prompt,
NumberOfImages: n,
AspectRatio: sizeToAspectRatio(request.Size),
}, nil
}
return ImagenRequest{
Instances: []ImagenInstance{{Prompt: request.Prompt}},
Parameters: ImagenParameters{
SampleCount: n,
AspectRatio: sizeToAspectRatio(request.Size),
},
}, nil
}
// publisherModelPath converts publisher/model names to the correct Vertex AI URL path segment.
// Google's own models (gemini/, google/) keep the raw /models/publisher/model format.
// Third-party publisher models use /publishers/{pub}/models/{model} format.
func publisherModelPath(modelName string) string {
if idx := strings.Index(modelName, "/"); idx != -1 {
publisher := modelName[:idx]
model := modelName[idx+1:]
if publisher == "gemini" || publisher == "google" {
return fmt.Sprintf("models/%s", modelName)
}
return fmt.Sprintf("publishers/%s/models/%s", publisher, model)
}
return fmt.Sprintf("models/%s", modelName)
}
// isImagen3Model returns true for image-generation-only models that use the :generateImages endpoint.
func isImagen3Model(modelName string) bool {
lower := strings.ToLower(modelName)
return strings.HasPrefix(lower, "imagen-3") ||
strings.HasPrefix(lower, "imagen-4") ||
strings.Contains(lower, "imagegeneration@00")
}
func sizeToAspectRatio(size string) string {
switch size {
case "1792x1024":
return "16:9"
case "1024x1792":
return "9:16"
case "1024x1024", "":
return "1:1"
default:
return "1:1"
}
}
func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) {
return channelhelper.DoRequestHelper(a, c, meta, requestBody)
}
func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) {
if meta.IsStream {
err, usage, meta.GenerationId = StreamHandler(c, resp)
if usage != nil {
usage.PromptTokens = meta.PromptTokens
}
} else {
switch meta.Mode {
case relaymode.Embeddings:
err, usage = EmbeddingHandler(c, resp)
case relaymode.ImagesGenerations, relaymode.ImagesEdits:
err = ImagenHandler(c, resp)
default:
err, usage = Handler(c, resp, meta.PromptTokens, meta.ActualModelName)
}
}
return
}
// ImagenHandler converts a Google Imagen predict/generateImages response to OpenAI image response format.
// Handles both Vertex AI format (predictions[].bytesBase64Encoded) and OpenAI format (data[].b64_json).
func ImagenHandler(c *gin.Context, resp *http.Response) *model.ErrorWithStatusCode {
responseBody, readErr := io.ReadAll(resp.Body)
resp.Body.Close()
if readErr != nil {
return openai.ErrorWrapper(readErr, "read_response_body_failed", http.StatusInternalServerError)
}
fmt.Printf("[ImagenHandler] raw response body: %s\n", string(responseBody))
// Safety check: empty response
if len(responseBody) == 0 || string(responseBody) == "{}" || string(responseBody) == "null" {
return openai.ErrorWrapper(
fmt.Errorf("upstream returned empty response body"),
"empty_response_body",
http.StatusBadGateway,
)
}
data := make([]openai.ImageData, 0)
// Format 1: :generateImages / :editImage → generatedImages[].image.imageBytes
var generateResp GenerateImagesResponse
if err := json.Unmarshal(responseBody, &generateResp); err == nil {
if generateResp.Error != nil {
return openai.ErrorWrapper(
fmt.Errorf("%s", generateResp.Error.Message),
"imagen_error",
http.StatusInternalServerError,
)
}
for _, img := range generateResp.GeneratedImages {
if img.Image.ImageBytes != "" {
data = append(data, openai.ImageData{B64Json: img.Image.ImageBytes})
}
}
}
// Format 2: legacy :predict → predictions[].bytesBase64Encoded or predictions[].gcsUri
if len(data) == 0 {
var imagenResp ImagenResponse
if err := json.Unmarshal(responseBody, &imagenResp); err == nil {
if imagenResp.Error != nil {
return openai.ErrorWrapper(
fmt.Errorf("%s", imagenResp.Error.Message),
"imagen_error",
http.StatusInternalServerError,
)
}
for _, p := range imagenResp.Predictions {
if p.BytesBase64Encoded != "" {
data = append(data, openai.ImageData{B64Json: p.BytesBase64Encoded})
} else if p.GcsUri != "" {
data = append(data, openai.ImageData{Url: p.GcsUri})
}
}
}
}
// Format 3: OpenAI format data[].b64_json — used by models like openai/gpt-image-*
if len(data) == 0 {
var openaiImgResp struct {
Data []struct {
B64Json string `json:"b64_json"`
URL string `json:"url"`
} `json:"data"`
}
if err := json.Unmarshal(responseBody, &openaiImgResp); err == nil {
for _, item := range openaiImgResp.Data {
data = append(data, openai.ImageData{B64Json: item.B64Json, Url: item.URL})
}
}
}
if len(data) == 0 {
return openai.ErrorWrapper(
fmt.Errorf("upstream returned no image data (response: %s)", string(responseBody)),
"no_image_data",
http.StatusBadGateway,
)
}
openaiResp := openai.ImageResponse{
Created: time.Now().Unix(),
Data: data,
}
jsonBytes, err := json.Marshal(openaiResp)
if err != nil {
return openai.ErrorWrapper(err, "marshal_response_failed", http.StatusInternalServerError)
}
c.Writer.Header().Set("Content-Type", "application/json")
c.Writer.WriteHeader(http.StatusOK)
c.Writer.Write(jsonBytes)
return nil
}
func (a *Adaptor) GetModelList() []string {
return ModelList
}
func (a *Adaptor) GetChannelName() string {
return "google gemini"
}