LLM 호출 90% 실수 #shorts #SAP #Joule

SAP BTP에서 LLM을 호출할 때 개발자 90%가 반복하는 실수

SAP BTP AI Core와 SAP Generative AI Hub를 통해 LLM(Large Language Model)을 호출하는 패턴은 생각보다 단순합니다. 그러나 실제 프로젝트에서 동일한 실수가 반복됩니다. 프롬프트 설계, 응답 처리, 에러 핸들링 세 영역에서 가장 자주 발생하는 패턴을 코드와 함께 분석합니다.

실수 1: 시스템 프롬프트 없이 사용자 입력만 전달

# 잘못된 방식: 사용자 입력 그대로 전달
import requests

def call_llm_wrong(user_question: str, token: str) -> str:
    payload = {
        "messages": [
            {"role": "user", "content": user_question}
        ],
        "model": "gpt-4o"
    }
    # 시스템 프롬프트 없음 — LLM이 역할과 제약을 모름
    response = requests.post(
        "https://api.ai.internalprod.eu-central-1.aws.ml.hana.ondemand.com/v2/inference/deployments/{id}/chat/completions",
        headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
        json=payload
    )
    return response.json()["choices"][0]["message"]["content"]

# 올바른 방식: 시스템 프롬프트로 역할과 제약 설정
def call_llm_correct(user_question: str, token: str, deployment_url: str) -> str:
    payload = {
        "messages": [
            {
                "role": "system",
                "content": (
                    "당신은 SAP BTP 전문가입니다. "
                    "사용자의 SAP 기술 질문에 간결하고 정확하게 답변하세요. "
                    "SAP 공식 문서 기반으로만 답변하고, "
                    "모르는 내용은 모른다고 명확히 밝히세요."
                )
            },
            {"role": "user", "content": user_question}
        ],
        "model": "gpt-4o",
        "max_tokens": 1024,
        "temperature": 0.3
    }
    response = requests.post(
        f"{deployment_url}/chat/completions",
        headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
        json=payload,
        timeout=30
    )
    response.raise_for_status()
    return response.json()["choices"][0]["message"]["content"]

시스템 프롬프트는 LLM의 행동 방식을 정의합니다. 없으면 일관성 없는 응답이 나오고, 보안 취약점(프롬프트 인젝션)에 노출됩니다.

실수 2: 응답 구조를 검증하지 않음

# 잘못된 방식: 응답 구조 검증 없이 직접 접근
content = response.json()["choices"][0]["message"]["content"]
# response가 에러이거나 choices가 빈 배열이면 KeyError/IndexError 발생

# 올바른 방식: 방어적 응답 처리
def extract_content(response_data: dict) -> str:
    if "error" in response_data:
        error_msg = response_data["error"].get("message", "알 수 없는 오류")
        raise ValueError(f"LLM API 오류: {error_msg}")

    choices = response_data.get("choices", [])
    if not choices:
        raise ValueError("LLM 응답에 choices가 없습니다.")

    message = choices[0].get("message", {})
    content = message.get("content", "").strip()

    if not content:
        raise ValueError("LLM이 빈 응답을 반환했습니다.")

    return content

실수 3: 토큰 한도 초과 시 응답 처리 안 함

def call_with_token_check(prompt: str, token: str, deployment_url: str) -> dict:
    payload = {
        "messages": [{"role": "user", "content": prompt}],
        "model": "gpt-4o",
        "max_tokens": 2048
    }
    response = requests.post(
        f"{deployment_url}/chat/completions",
        headers={"Authorization": f"Bearer {token}"},
        json=payload,
        timeout=60
    )
    data = response.json()

    choice = data["choices"][0]
    finish_reason = choice.get("finish_reason", "")

    if finish_reason == "length":
        # 토큰 한도로 응답이 잘림
        return {
            "content": choice["message"]["content"],
            "truncated": True,
            "warning": "응답이 max_tokens 한도로 잘렸습니다. max_tokens를 늘리거나 입력을 줄이세요."
        }

    return {
        "content": choice["message"]["content"],
        "truncated": False
    }

SAP AI Core Deployment URL 관리

# BTP 서비스 바인딩에서 credentials 읽기
import json, os

def get_aicore_config() -> dict:
    # BTP에서 바인딩된 서비스 환경변수
    vcap = json.loads(os.environ.get("VCAP_SERVICES", "{}"))
    aicore_services = vcap.get("aicore", [])

    if not aicore_services:
        raise EnvironmentError("AI Core 서비스가 바인딩되지 않았습니다.")

    creds = aicore_services[0]["credentials"]
    return {
        "token_url": creds["url"] + "/oauth/token",
        "client_id": creds["clientid"],
        "client_secret": creds["clientsecret"],
        "api_base": creds["serviceurls"]["AI_API_URL"]
    }

Retry 로직으로 429 Rate Limit 처리

import time, functools

def with_retry(max_retries=3, backoff_base=2):
    def decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            for attempt in range(max_retries):
                try:
                    return func(*args, **kwargs)
                except requests.HTTPError as e:
                    if e.response.status_code == 429:
                        wait = backoff_base ** attempt
                        print(f"Rate limit 초과. {wait}초 후 재시도 ({attempt+1}/{max_retries})")
                        time.sleep(wait)
                    else:
                        raise
            raise RuntimeError(f"{max_retries}회 재시도 실패")
        return wrapper
    return decorator

@with_retry(max_retries=3, backoff_base=2)
def call_llm_safe(prompt: str, token: str, deployment_url: str) -> str:
    # ... LLM 호출 로직
    pass

체크리스트: LLM 호출 전 확인사항

시스템 프롬프트에 역할, 제약, 출력 형식이 정의되어 있는가
max_tokens와 temperature가 용도에 맞게 설정되어 있는가
응답의 finish_reason을 확인하는가
429 Rate Limit에 대한 Retry 로직이 있는가
API 토큰이 소스 코드에 하드코딩되어 있지 않은가

공식 문서

SAP AI Core와 Generative AI Hub 사용 가이드는 SAP AI Core 공식 문서에서 확인하세요.

댓글 0