บทนำ: ทำไมต้องคำนึงถึง AI API Integration ใน Microservice

ในยุคที่ AI กลายเป็นหัวใจสำคัญของแอปพลิเคชัน modern stack การออกแบบระบบ microservice ที่รองรับการทำงานร่วมกับ AI API อย่างมีประสิทธิภาพไม่ใช่ทางเลือกอีกต่อไป แต่เป็นความจำเป็น บทความนี้จะพาคุณไปสำรวจ best practices ในการ integrate AI API เข้ากับ microservice architecture ตั้งแต่พื้นฐานจนถึง advanced patterns ที่ใช้งานจริงใน production การใช้ HolySheep AI ที่มีอัตราแลกเปลี่ยน ¥1=$1 ช่วยให้ประหยัดค่าใช้จ่ายได้ถึง 85% พร้อม latency เพียง <50ms และรองรับการชำระเงินผ่าน WeChat/Alipay

1. พื้นฐาน API Client Pattern

เริ่มต้นด้วยการสร้าง abstraction layer สำหรับ AI API เพื่อให้ง่ายต่อการ maintain และ swap provider ในอนาคต
// ai-client.service.ts - Abstraction Layer for AI APIs
import axios, { AxiosInstance, AxiosRequestConfig } from 'axios';

interface AIResponse {
  id: string;
  model: string;
  choices: Array<{
    message: {
      role: string;
      content: string;
    };
    finish_reason: string;
  }>;
  usage: {
    prompt_tokens: number;
    completion_tokens: number;
    total_tokens: number;
  };
}

interface AIRequest {
  model: string;
  messages: Array<{ role: string; content: string }>;
  temperature?: number;
  max_tokens?: number;
}

class HolySheepAIClient {
  private client: AxiosInstance;
  private apiKey: string;
  private readonly baseURL = 'https://api.holysheep.ai/v1';

  constructor(apiKey: string) {
    this.apiKey = apiKey;
    this.client = axios.create({
      baseURL: this.baseURL,
      timeout: 30000,
      headers: {
        'Authorization': Bearer ${this.apiKey},
        'Content-Type': 'application/json',
      },
    });

    // Request interceptor for logging
    this.client.interceptors.request.use((config) => {
      console.log([AI Client] ${config.method?.toUpperCase()} ${config.url});
      const startTime = Date.now();
      (config as any).metadata = { startTime };
      return config;
    });

    // Response interceptor for metrics
    this.client.interceptors.response.use(
      (response) => {
        const duration = Date.now() - (response.config as any).metadata.startTime;
        console.log([AI Client] Response in ${duration}ms);
        return response;
      },
      async (error) => {
        const originalRequest = error.config;
        if (error.response?.status === 429 && !originalRequest._retry) {
          originalRequest._retry = true;
          const retryAfter = error.response.headers['retry-after'];
          const delay = retryAfter ? parseInt(retryAfter) * 1000 : 5000;
          await new Promise((resolve) => setTimeout(resolve, delay));
          return this.client(originalRequest);
        }
        return Promise.reject(error);
      }
    );
  }

  async chat(request: AIRequest): Promise<AIResponse> {
    const response = await this.client.post<AIResponse>('/chat/completions', request);
    return response.data;
  }

  async chatStream(request: AIRequest): Promise<AsyncIterableIterator<string>> {
    const response = await this.client.post(
      '/chat/completions',
      { ...request, stream: true },
      { responseType: 'stream' }
    );

    return (async function* () {
      const stream = response.data;
      const decoder = new TextDecoder();
      let buffer = '';

      for await (const chunk of stream) {
        buffer += decoder.decode(chunk, { stream: true });
        const lines = buffer.split('\n');
        buffer = lines.pop() || '';

        for (const line of lines) {
          if (line.startsWith('data: ')) {
            const data = line.slice(6);
            if (data === '[DONE]') return;
            try {
              const parsed = JSON.parse(data);
              const content = parsed.choices?.[0]?.delta?.content;
              if (content) yield content;
            } catch (e) {
              // Skip malformed JSON
            }
          }
        }
      }
    })();
  }
}

export { HolySheepAIClient, AIRequest, AIResponse };

2. Circuit Breaker Pattern สำหรับ AI Service

AI API เป็น external dependency ที่อาจ fail ได้เสมอ การใช้ Circuit Breaker ช่วยป้องกัน cascade failure และ provide fallback mechanism
// circuit-breaker.ts - Circuit Breaker Implementation
type CircuitState = 'CLOSED' | 'OPEN' | 'HALF_OPEN';

interface CircuitBreakerConfig {
  failureThreshold: number;
  successThreshold: number;
  timeout: number;
  halfOpenRequests: number;
}

class CircuitBreaker {
  private state: CircuitState = 'CLOSED';
  private failureCount = 0;
  private successCount = 0;
  private nextAttempt: number = Date.now();
  private halfOpenRequests = 0;

  constructor(private config: CircuitBreakerConfig) {}

  async execute<T>(fn: () => Promise<T>): Promise<T> {
    if (this.state === 'OPEN') {
      if (Date.now() < this.nextAttempt) {
        throw new Error('Circuit breaker is OPEN - service unavailable');
      }
      this.state = 'HALF_OPEN';
      this.halfOpenRequests = 0;
    }

    if (this.state === 'HALF_OPEN') {
      if (this.halfOpenRequests >= this.config.halfOpenRequests) {
        throw new Error('Circuit breaker is HALF_OPEN - max requests reached');
      }
      this.halfOpenRequests++;
    }

    try {
      const result = await Promise.race([
        fn(),
        new Promise((_, reject) =>
          setTimeout(() => reject(new Error('Timeout')), this.config.timeout)
        ),
      ]);
      this.onSuccess();
      return result as T;
    } catch (error) {
      this.onFailure();
      throw error;
    }
  }

  private onSuccess(): void {
    this.failureCount = 0;
    if (this.state === 'HALF_OPEN') {
      this.successCount++;
      if (this.successCount >= this.config.successThreshold) {
        this.state = 'CLOSED';
        this.successCount = 0;
      }
    }
  }

  private onFailure(): void {
    this.failureCount++;
    this.successCount = 0;
    if (
      this.failureCount >= this.config.failureThreshold ||
      this.state === 'HALF_OPEN'
    ) {
      this.state = 'OPEN';
      this.nextAttempt = Date.now() + this.config.timeout;
    }
  }

  getState(): CircuitState {
    return this.state;
  }
}

// AI Service with Circuit Breaker
class AIServiceWithCircuitBreaker {
  private aiClient: HolySheepAIClient;
  private circuitBreaker: CircuitBreaker;

  constructor(apiKey: string) {
    this.aiClient = new HolySheepAIClient(apiKey);
    this.circuitBreaker = new CircuitBreaker({
      failureThreshold: 5,
      successThreshold: 3,
      timeout: 30000,
      halfOpenRequests: 3,
    });
  }

  async chat(messages: Array<{ role: string; content: string }>): Promise<string> {
    return this.circuitBreaker.execute(async () => {
      const response = await this.aiClient.chat({
        model: 'gpt-4.1',
        messages,
        temperature: 0.7,
        max_tokens: 2000,
      });
      return response.choices[0].message.content;
    });
  }

  // Fallback when circuit is open
  async chatWithFallback(
    messages: Array<{ role: string; content: string }>
  ): Promise<string> {
    try {
      return await this.chat(messages);
    } catch (error) {
      console.warn('AI service unavailable, using fallback');
      return this.getFallbackResponse(messages);
    }
  }

  private getFallbackResponse(messages: Array<{ role: string; content: string }>): string {
    // Simple rule-based fallback or cached response
    const lastMessage = messages[messages.length - 1]?.content || '';
    if (lastMessage.includes('สถานะ')) {
      return 'ขออภัย ระบบไม่สามารถตอบคำถามได้ในขณะนี้ กรุณาลองใหม่ภายหลัง';
    }
    return 'ขออภัย บริการ AI ขัดข้องชั่วคราว ทีมงานกำลังดำเนินการแก้ไข';
  }
}

export { CircuitBreaker, AIServiceWithCircuitBreaker };

3. Concurrency Control และ Rate Limiting

การจัดการ concurrent requests อย่างเหมาะสมช่วยป้องกัน quota exhaustion และ maintain SLA
// concurrency-controller.ts - Advanced Concurrency Management
import { RateLimiter } from 'limiter';
import PQueue from 'p-queue';

interface TokenBucket {
  tokens: number;
  maxTokens: number;
  refillRate: number;
  lastRefill: number;
}

class AdaptiveRateLimiter {
  private bucket: TokenBucket;
  private queue: PQueue;

  constructor(
    private requestsPerMinute: number,
    private maxConcurrent: number
  ) {
    this.bucket = {
      tokens: requestsPerMinute,
      maxTokens: requestsPerMinute,
      refillRate: requestsPerMinute / 60,
      lastRefill: Date.now(),
    };
    this.queue = new PQueue({
      concurrency: maxConcurrent,
      interval: 60000,
      carryoverConcurrencyCount: true,
    });
  }

  private refill(): void {
    const now = Date.now();
    const elapsed = (now - this.bucket.lastRefill) / 1000;
    this.bucket.tokens = Math.min(
      this.bucket.maxTokens,
      this.bucket.tokens + elapsed * this.bucket.refillRate
    );
    this.bucket.lastRefill = now;
  }

  async acquire(): Promise<void> {
    return new Promise((resolve) => {
      const checkAndResolve = () => {
        this.refill();
        if (this.bucket.tokens >= 1) {
          this.bucket.tokens--;
          resolve();
        } else {
          setTimeout(checkAndResolve, 100);
        }
      };
      checkAndResolve();
    });
  }

  addTask<T>(fn: () => Promise<T>): Promise<T> {
    return this.queue.add(fn);
  }

  getStatus() {
    return {
      availableTokens: Math.floor(this.bucket.tokens),
      queueSize: this.queue.size,
      pending: this.queue.pending,
    };
  }
}

// Batch Processing for Cost Optimization
class BatchProcessor {
  private buffer: Array<{
    messages: Array<{ role: string; content: string }>;
    resolve: (value: string) => void;
    reject: (error: Error) => void;
  }> = [];
  private processing = false;
  private batchSize = 10;
  private maxWaitTime = 1000; // ms

  constructor(
    private aiClient: HolySheepAIClient,
    private limiter: AdaptiveRateLimiter
  ) {
    setInterval(() => this.processBatch(), this.maxWaitTime);
  }

  async request(
    messages: Array<{ role: string; content: string }>
  ): Promise<string> {
    return new Promise((resolve, reject) => {
      this.buffer.push({ messages, resolve, reject });
      if (this.buffer.length >= this.batchSize) {
        this.processBatch();
      }
    });
  }

  private async processBatch(): Promise<void> {
    if (this.processing || this.buffer.length === 0) return;
    this.processing = true;

    const batch = this.buffer.splice(0, this.batchSize);
    await this.limiter.acquire();

    try {
      const combinedMessages = batch.map((item, idx) => ({
        role: 'user' as const,
        content: [Request ${idx}]: ${item.messages.map(m => m.content).join('\n')},
      }));

      const response = await this.aiClient.chat({
        model: 'gpt-4.1',
        messages: [
          {
            role: 'system',
            content: You are processing ${batch.length} requests. Respond with ${batch.length} answers separated by "||| ANSWER ${batch.length} |||".,
          },
          ...combinedMessages,
        ],
      });

      const answers = response.choices[0].message.content.split(||| ANSWER ${batch.length} |||);
      batch.forEach((item, idx) => {
        item.resolve(answers[idx]?.trim() || answers[0]?.trim() || '');
      });
    } catch (error) {
      batch.forEach((item) => item.reject(error as Error));
    } finally {
      this.processing = false;
    }
  }
}

export { AdaptiveRateLimiter, BatchProcessor };

4. Caching Strategy สำหรับ Cost Optimization

การ cache response ที่ถูกต้องช่วยลดค่าใช้จ่ายได้อย่างมาก โดยเฉพาะเมื่อใช้ HolySheep AI ที่มีราคาถูกกว่า 85%
// intelligent-cache.ts - Semantic Cache with Redis
import Redis from 'ioredis';
import crypto from 'crypto';

interface CacheEntry {
  response: string;
  timestamp: number;
  hitCount: number;
}

class SemanticCache {
  private redis: Redis;
  private embeddingCache: Redis;
  private similarityThreshold = 0.95;

  constructor(redisUrl: string) {
    this.redis = new Redis(redisUrl);
    this.embeddingCache = new Redis(redisUrl);
  }

  private generateHash(messages: Array<{ role: string; content: string }>): string {
    const normalized = messages
      .map((m) => m.content.toLowerCase().trim())
      .join('|');
    return crypto.createHash('sha256').update(normalized).digest('hex').slice(0, 16);
  }

  async get(
    messages: Array<{ role: string; content: string }>
  ): Promise<string | null> {
    const cacheKey = ai:cache:${this.generateHash(messages)};
    const cached = await