package gemini import ( "encoding/json" "errors" "fmt" "io" "net/http" "strings" "time" "github.com/gin-gonic/gin" "github.com/songquanpeng/one-api/common/config" "github.com/songquanpeng/one-api/common/helper" channelhelper "github.com/songquanpeng/one-api/relay/adaptor" "github.com/songquanpeng/one-api/relay/adaptor/openai" "github.com/songquanpeng/one-api/relay/meta" "github.com/songquanpeng/one-api/relay/model" "github.com/songquanpeng/one-api/relay/relaymode" ) type Adaptor struct { } func (a *Adaptor) Init(meta *meta.Meta) { } func (a *Adaptor) GetRequestURL(meta *meta.Meta) (string, error) { defaultVersion := config.GeminiVersion modelLower := strings.ToLower(meta.ActualModelName) if strings.Contains(modelLower, "gemini-1.5") || strings.Contains(modelLower, "gemini-2.") || strings.Contains(modelLower, "gemini-3.") { defaultVersion = "v1beta" } version := helper.AssignOrDefault(meta.Config.APIVersion, defaultVersion) modelName := meta.ActualModelName switch meta.Mode { case relaymode.ImagesEdits: // Image editing uses :predict (same as generation) with referenceImages in instances[] if strings.Contains(modelName, "/") { return fmt.Sprintf("%s/v1/%s:predict", meta.BaseURL, publisherModelPath(modelName)), nil } return fmt.Sprintf("%s/v1beta/models/%s:predict", meta.BaseURL, modelName), nil case relaymode.ImagesGenerations: // Imagen 3+ models use :generateImages endpoint if isImagen3Model(modelName) { return fmt.Sprintf("%s/v1/models/%s:generateImages", meta.BaseURL, publisherModelPath(modelName)), nil } // Publisher models (e.g. klingai/kling-v2, volcengine/doubao-*) use v1 :predict if strings.Contains(modelName, "/") { return fmt.Sprintf("%s/v1/%s:predict", meta.BaseURL, publisherModelPath(modelName)), nil } // Legacy imagegeneration models (no publisher prefix) use v1beta :predict return fmt.Sprintf("%s/v1beta/models/%s:predict", meta.BaseURL, modelName), nil case relaymode.Embeddings: return fmt.Sprintf("%s/%s/models/%s:batchEmbedContents", meta.BaseURL, version, modelName), nil } action := "generateContent" if meta.IsStream { action = "streamGenerateContent?alt=sse" } return fmt.Sprintf("%s/%s/%s:%s", meta.BaseURL, version, publisherModelPath(modelName), action), nil } func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Request, meta *meta.Meta) error { channelhelper.SetupCommonRequestHeader(c, req, meta) req.Header.Set("x-goog-api-key", meta.APIKey) return nil } func (a *Adaptor) ConvertRequest(c *gin.Context, relayMode int, request *model.GeneralOpenAIRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } switch relayMode { case relaymode.Embeddings: geminiEmbeddingRequest := ConvertEmbeddingRequest(*request) return geminiEmbeddingRequest, nil default: geminiRequest := ConvertRequest(*request) return geminiRequest, nil } } func (a *Adaptor) ConvertImageRequest(request *model.ImageRequest) (any, error) { if request == nil { return nil, errors.New("request is nil") } n := request.N if n <= 0 { n = 1 } // If an image is provided, convert to :editImage format (image-to-image) if request.Image != nil { imageStr, ok := request.Image.(string) if !ok { return nil, errors.New("image field must be a base64 string or data URL") } // Strip data URL prefix if present (e.g. "data:image/png;base64,") base64Data := imageStr if idx := strings.Index(imageStr, ";base64,"); idx != -1 { base64Data = imageStr[idx+8:] } return EditImageRequest{ Instances: []EditImageInstance{ { Prompt: request.Prompt, ReferenceImages: []ReferenceImageItem{ { ReferenceType: "REFERENCE_TYPE_RAW", ReferenceId: 1, ReferenceImage: ReferenceImageData{ BytesBase64Encoded: base64Data, }, }, }, }, }, }, nil } if isImagen3Model(request.Model) { return GenerateImagesRequest{ Prompt: request.Prompt, NumberOfImages: n, AspectRatio: sizeToAspectRatio(request.Size), }, nil } return ImagenRequest{ Instances: []ImagenInstance{{Prompt: request.Prompt}}, Parameters: ImagenParameters{ SampleCount: n, AspectRatio: sizeToAspectRatio(request.Size), }, }, nil } // publisherModelPath converts publisher/model names to the correct Vertex AI URL path segment. // Google's own models (gemini/, google/) keep the raw /models/publisher/model format. // Third-party publisher models use /publishers/{pub}/models/{model} format. func publisherModelPath(modelName string) string { if idx := strings.Index(modelName, "/"); idx != -1 { publisher := modelName[:idx] model := modelName[idx+1:] if publisher == "gemini" || publisher == "google" { return fmt.Sprintf("models/%s", modelName) } return fmt.Sprintf("publishers/%s/models/%s", publisher, model) } return fmt.Sprintf("models/%s", modelName) } // isImagen3Model returns true for image-generation-only models that use the :generateImages endpoint. func isImagen3Model(modelName string) bool { lower := strings.ToLower(modelName) return strings.HasPrefix(lower, "imagen-3") || strings.HasPrefix(lower, "imagen-4") || strings.Contains(lower, "imagegeneration@00") } func sizeToAspectRatio(size string) string { switch size { case "1792x1024": return "16:9" case "1024x1792": return "9:16" case "1024x1024", "": return "1:1" default: return "1:1" } } func (a *Adaptor) DoRequest(c *gin.Context, meta *meta.Meta, requestBody io.Reader) (*http.Response, error) { return channelhelper.DoRequestHelper(a, c, meta, requestBody) } func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, meta *meta.Meta) (usage *model.Usage, err *model.ErrorWithStatusCode) { if meta.IsStream { err, usage, meta.GenerationId = StreamHandler(c, resp) if usage != nil { usage.PromptTokens = meta.PromptTokens } } else { switch meta.Mode { case relaymode.Embeddings: err, usage = EmbeddingHandler(c, resp) case relaymode.ImagesGenerations, relaymode.ImagesEdits: err = ImagenHandler(c, resp) default: err, usage = Handler(c, resp, meta.PromptTokens, meta.ActualModelName) } } return } // ImagenHandler converts a Google Imagen predict/generateImages response to OpenAI image response format. // Handles both Vertex AI format (predictions[].bytesBase64Encoded) and OpenAI format (data[].b64_json). func ImagenHandler(c *gin.Context, resp *http.Response) *model.ErrorWithStatusCode { responseBody, readErr := io.ReadAll(resp.Body) resp.Body.Close() if readErr != nil { return openai.ErrorWrapper(readErr, "read_response_body_failed", http.StatusInternalServerError) } fmt.Printf("[ImagenHandler] raw response body: %s\n", string(responseBody)) // Safety check: empty response if len(responseBody) == 0 || string(responseBody) == "{}" || string(responseBody) == "null" { return openai.ErrorWrapper( fmt.Errorf("upstream returned empty response body"), "empty_response_body", http.StatusBadGateway, ) } data := make([]openai.ImageData, 0) // Format 1: :generateImages / :editImage → generatedImages[].image.imageBytes var generateResp GenerateImagesResponse if err := json.Unmarshal(responseBody, &generateResp); err == nil { if generateResp.Error != nil { return openai.ErrorWrapper( fmt.Errorf("%s", generateResp.Error.Message), "imagen_error", http.StatusInternalServerError, ) } for _, img := range generateResp.GeneratedImages { if img.Image.ImageBytes != "" { data = append(data, openai.ImageData{B64Json: img.Image.ImageBytes}) } } } // Format 2: legacy :predict → predictions[].bytesBase64Encoded or predictions[].gcsUri if len(data) == 0 { var imagenResp ImagenResponse if err := json.Unmarshal(responseBody, &imagenResp); err == nil { if imagenResp.Error != nil { return openai.ErrorWrapper( fmt.Errorf("%s", imagenResp.Error.Message), "imagen_error", http.StatusInternalServerError, ) } for _, p := range imagenResp.Predictions { if p.BytesBase64Encoded != "" { data = append(data, openai.ImageData{B64Json: p.BytesBase64Encoded}) } else if p.GcsUri != "" { data = append(data, openai.ImageData{Url: p.GcsUri}) } } } } // Format 3: OpenAI format data[].b64_json — used by models like openai/gpt-image-* if len(data) == 0 { var openaiImgResp struct { Data []struct { B64Json string `json:"b64_json"` URL string `json:"url"` } `json:"data"` } if err := json.Unmarshal(responseBody, &openaiImgResp); err == nil { for _, item := range openaiImgResp.Data { data = append(data, openai.ImageData{B64Json: item.B64Json, Url: item.URL}) } } } if len(data) == 0 { return openai.ErrorWrapper( fmt.Errorf("upstream returned no image data (response: %s)", string(responseBody)), "no_image_data", http.StatusBadGateway, ) } openaiResp := openai.ImageResponse{ Created: time.Now().Unix(), Data: data, } jsonBytes, err := json.Marshal(openaiResp) if err != nil { return openai.ErrorWrapper(err, "marshal_response_failed", http.StatusInternalServerError) } c.Writer.Header().Set("Content-Type", "application/json") c.Writer.WriteHeader(http.StatusOK) c.Writer.Write(jsonBytes) return nil } func (a *Adaptor) GetModelList() []string { return ModelList } func (a *Adaptor) GetChannelName() string { return "google gemini" }