Documentation
¶
Overview ¶
Package gollama provides Go bindings for llama.cpp using purego. This package allows you to use llama.cpp functionality from Go without CGO.
The bindings are designed to be as close to the original llama.cpp C API as possible, while providing Go-friendly interfaces where appropriate.
Example usage:
// Initialize the library
gollama.Backend_init()
defer gollama.Backend_free()
// Load a model
params := gollama.Model_default_params()
model, err := gollama.Model_load_from_file("model.gguf", params)
if err != nil {
log.Fatal(err)
}
defer gollama.Model_free(model)
// Create context and generate text
ctxParams := gollama.Context_default_params()
ctx, err := gollama.Init_from_model(model, ctxParams)
if err != nil {
log.Fatal(err)
}
defer gollama.Free(ctx)
Index ¶
- Constants
- Variables
- func ApplyConfig(config *Config) error
- func Backend_free()
- func Backend_init() error
- func Batch_free(batch LlamaBatch)
- func CheckResult(result int, function string) error
- func CleanLibraryCache() error
- func Cleanup()
- func Decode(ctx LlamaContext, batch LlamaBatch) error
- func ErrorFromCode(code int) error
- func Free(ctx LlamaContext)
- func GetConfigPath() string
- func GetSHA256ForFile(filepath string) (string, error)
- func Get_embeddings(ctx LlamaContext) *float32
- func Get_embeddings_ith(ctx LlamaContext, i int32) *float32
- func Get_logits(ctx LlamaContext) *float32
- func Get_logits_ith(ctx LlamaContext, i int32) *float32
- func HandleError(err error, context string) error
- func IsFatalError(err error) bool
- func IsRetryableError(err error) bool
- func LoadLibraryWithVersion(version string) error
- func Max_devices() uint64
- func Memory_clear(ctx LlamaContext, reset bool) bool
- func Model_free(model LlamaModel)
- func Model_n_embd(model LlamaModel) int32
- func Must(err error)
- func Print_system_info() string
- func RegisterFunction(fptr interface{}, name string) error
- func Sampler_free(sampler LlamaSampler)
- func SetGlobalConfig(config *Config) error
- func SetGlobalErrorHandler(handler *ErrorHandler)
- func Set_causal_attn(ctx LlamaContext, causal bool)
- func Set_embeddings(ctx LlamaContext, embeddings bool)
- func Supports_gpu_offload() bool
- func Supports_mlock() bool
- func Supports_mmap() bool
- func Token_to_piece(model LlamaModel, token LlamaToken, special bool) string
- func Try(fn func() error) (err error)
- func WrapError(err error, message string) error
- func WrapErrorf(err error, format string, args ...interface{}) error
- type Config
- type DownloadResult
- type DownloadTask
- type ErrorCategory
- type ErrorHandler
- type LibraryDownloader
- func (d *LibraryDownloader) CleanCache() error
- func (d *LibraryDownloader) DownloadAndExtract(downloadURL, filename string) (string, error)
- func (d *LibraryDownloader) DownloadAndExtractWithChecksum(downloadURL, filename, expectedChecksum string) (string, string, error)
- func (d *LibraryDownloader) DownloadMultiplePlatforms(platforms []string, version string) ([]DownloadResult, error)
- func (d *LibraryDownloader) FindAssetByPattern(release *ReleaseInfo, pattern string) (string, string, error)
- func (d *LibraryDownloader) FindLibraryPath(extractedDir string) (string, error)
- func (d *LibraryDownloader) FindLibraryPathForPlatform(extractedDir, goos string) (string, error)
- func (d *LibraryDownloader) GetLatestRelease() (*ReleaseInfo, error)
- func (d *LibraryDownloader) GetPlatformAssetPattern() (string, error)
- func (d *LibraryDownloader) GetPlatformAssetPatternForPlatform(goos, goarch string) (string, error)
- func (d *LibraryDownloader) GetReleaseByTag(tag string) (*ReleaseInfo, error)
- type LibraryLoader
- type LlamaAdapterLora
- type LlamaAttentionType
- type LlamaBatch
- type LlamaChatMessage
- type LlamaContext
- type LlamaContextParams
- type LlamaError
- type LlamaFtype
- type LlamaGpuBackend
- type LlamaLogitBias
- type LlamaMemory
- type LlamaModel
- type LlamaModelParams
- type LlamaModelQuantizeParams
- type LlamaPoolingType
- type LlamaPos
- type LlamaRopeScalingType
- type LlamaSampler
- type LlamaSamplerChainParams
- type LlamaSeqId
- type LlamaSplitMode
- type LlamaToken
- type LlamaTokenAttr
- type LlamaTokenData
- type LlamaTokenDataArray
- type LlamaTokenType
- type LlamaVocab
- type LlamaVocabType
- type ReleaseInfo
Constants ¶
const ( LLAMA_ERR_SUCCESS = 0 LLAMA_ERR_FAIL = -1 LLAMA_ERR_INVALID_PARAM = -2 LLAMA_ERR_OUT_OF_MEMORY = -3 LLAMA_ERR_FILE_NOT_FOUND = -4 LLAMA_ERR_FILE_READ = -5 LLAMA_ERR_FILE_WRITE = -6 LLAMA_ERR_INVALID_FORMAT = -7 LLAMA_ERR_UNSUPPORTED = -8 LLAMA_ERR_BACKEND_INIT = -9 LLAMA_ERR_CONTEXT_FULL = -10 LLAMA_ERR_TOKEN_INVALID = -11 LLAMA_ERR_MODEL_CORRUPTED = -12 LLAMA_ERR_GPU_UNAVAILABLE = -13 )
Error codes matching llama.cpp return values
const ( // Version is the gollama.cpp version Version = "0.2.0" // LlamaCppBuild is the llama.cpp build number this version is based on LlamaCppBuild = "b6099" // FullVersion combines both version numbers FullVersion = "v" + Version + "-llamacpp." + LlamaCppBuild )
Version information
const ( LLAMA_DEFAULT_SEED = 0xFFFFFFFF LLAMA_TOKEN_NULL = -1 // File magic numbers LLAMA_FILE_MAGIC_GGLA = 0x67676c61 LLAMA_FILE_MAGIC_GGSN = 0x6767736e LLAMA_FILE_MAGIC_GGSQ = 0x67677371 // Session constants LLAMA_SESSION_MAGIC = LLAMA_FILE_MAGIC_GGSN LLAMA_SESSION_VERSION = 9 LLAMA_STATE_SEQ_MAGIC = LLAMA_FILE_MAGIC_GGSQ LLAMA_STATE_SEQ_VERSION = 2 )
Constants from llama.h
Variables ¶
var ( // Library errors ErrLibraryNotLoaded = errors.New("llama.cpp library not loaded") ErrLibraryLoadFailed = errors.New("failed to load llama.cpp library") ErrFunctionNotFound = errors.New("function not found in library") ErrInvalidLibraryPath = errors.New("invalid library path") // Model errors ErrModelNotLoaded = errors.New("model not loaded") ErrModelLoadFailed = errors.New("failed to load model") ErrModelSaveFailed = errors.New("failed to save model") ErrInvalidModelPath = errors.New("invalid model path") ErrModelCorrupted = errors.New("model file corrupted") ErrUnsupportedModelType = errors.New("unsupported model type") // Context errors ErrContextNotCreated = errors.New("context not created") ErrContextCreationFailed = errors.New("failed to create context") ErrInvalidContextSize = errors.New("invalid context size") ErrContextFull = errors.New("context is full") // Token errors ErrTokenizationFailed = errors.New("tokenization failed") ErrInvalidToken = errors.New("invalid token") ErrTokenOutOfRange = errors.New("token out of vocabulary range") // Generation errors ErrGenerationFailed = errors.New("text generation failed") ErrSamplingFailed = errors.New("token sampling failed") ErrInvalidSamplingParams = errors.New("invalid sampling parameters") // Memory errors ErrOutOfMemory = errors.New("out of memory") ErrMemoryAllocationFailed = errors.New("memory allocation failed") ErrInvalidMemorySize = errors.New("invalid memory size") // Configuration errors ErrInvalidConfig = errors.New("invalid configuration") ErrConfigValidationFailed = errors.New("configuration validation failed") ErrUnsupportedPlatform = errors.New("unsupported platform") // Backend errors ErrBackendNotAvailable = errors.New("backend not available") ErrBackendInitFailed = errors.New("backend initialization failed") ErrGPUNotAvailable = errors.New("GPU not available") ErrCUDANotAvailable = errors.New("CUDA not available") ErrMetalNotAvailable = errors.New("metal backend not available") ErrVulkanNotAvailable = errors.New("vulkan backend not available") // File I/O errors ErrFileNotFound = errors.New("file not found") ErrFileReadFailed = errors.New("failed to read file") ErrFileWriteFailed = errors.New("failed to write file") ErrInvalidFileFormat = errors.New("invalid file format") // Parameter errors ErrInvalidParameter = errors.New("invalid parameter") ErrParameterOutOfRange = errors.New("parameter out of range") ErrMissingParameter = errors.New("missing required parameter") // Thread/concurrency errors ErrThreadingFailed = errors.New("threading operation failed") ErrConcurrencyViolation = errors.New("concurrency violation") ErrDeadlock = errors.New("deadlock detected") )
Error types for different categories of errors
Functions ¶
func ApplyConfig ¶
ApplyConfig applies the configuration to the library
func CheckResult ¶
CheckResult checks a result code and returns an appropriate error
func CleanLibraryCache ¶
func CleanLibraryCache() error
CleanLibraryCache removes cached library files to force re-download
func ErrorFromCode ¶
ErrorFromCode converts a llama.cpp error code to a Go error
func GetConfigPath ¶
func GetConfigPath() string
GetConfigPath returns the default configuration file path
func GetSHA256ForFile ¶
GetSHA256ForFile calculates the SHA256 checksum for a given file
func Get_embeddings ¶
func Get_embeddings(ctx LlamaContext) *float32
Get_embeddings returns the embeddings for the context
func Get_embeddings_ith ¶
func Get_embeddings_ith(ctx LlamaContext, i int32) *float32
Get_embeddings_ith returns the embeddings for the ith sequence in the context
func Get_logits_ith ¶
func Get_logits_ith(ctx LlamaContext, i int32) *float32
Get_logits_ith gets logits for a specific token
func HandleError ¶
HandleError is a convenience function that uses the global error handler
func IsFatalError ¶
IsFatalError checks if an error is fatal and should stop execution
func IsRetryableError ¶
IsRetryableError checks if an error is retryable
func LoadLibraryWithVersion ¶
LoadLibraryWithVersion loads a specific version of the llama.cpp library
func Memory_clear ¶
func Memory_clear(ctx LlamaContext, reset bool) bool
Memory_clear clears the KV cache
func Model_n_embd ¶
func Model_n_embd(model LlamaModel) int32
Model_n_embd returns the number of embedding dimensions for the model
func Print_system_info ¶
func Print_system_info() string
Print_system_info prints system information
func RegisterFunction ¶
RegisterFunction registers a function with the global library handle
func SetGlobalConfig ¶
SetGlobalConfig sets the global configuration
func SetGlobalErrorHandler ¶
func SetGlobalErrorHandler(handler *ErrorHandler)
SetGlobalErrorHandler sets the global error handler
func Set_causal_attn ¶
func Set_causal_attn(ctx LlamaContext, causal bool)
Set_causal_attn sets whether to use causal attention
func Set_embeddings ¶
func Set_embeddings(ctx LlamaContext, embeddings bool)
Set_embeddings sets whether to extract embeddings
func Supports_gpu_offload ¶
func Supports_gpu_offload() bool
Supports_gpu_offload returns whether GPU offload is supported
func Token_to_piece ¶
func Token_to_piece(model LlamaModel, token LlamaToken, special bool) string
Token_to_piece converts a token to its string representation using model
func WrapErrorf ¶
WrapErrorf wraps an error with formatted additional context
Types ¶
type Config ¶
type Config struct {
// Library settings
LibraryPath string `json:"library_path,omitempty"`
UseEmbedded bool `json:"use_embedded"`
EnableLogging bool `json:"enable_logging"`
LogLevel int `json:"log_level"`
// Performance settings
NumThreads int `json:"num_threads"`
EnableGPU bool `json:"enable_gpu"`
GPULayers int `json:"gpu_layers"`
MetalEnabled bool `json:"metal_enabled"`
CUDAEnabled bool `json:"cuda_enabled"`
VulkanEnabled bool `json:"vulkan_enabled"`
// Memory settings
ContextSize int `json:"context_size"`
BatchSize int `json:"batch_size"`
UbatchSize int `json:"ubatch_size"`
MemoryMapEnabled bool `json:"memory_map_enabled"`
MemoryLockEnabled bool `json:"memory_lock_enabled"`
// Model settings
ModelPath string `json:"model_path,omitempty"`
VocabOnly bool `json:"vocab_only"`
UseQuantization bool `json:"use_quantization"`
QuantizationType string `json:"quantization_type,omitempty"`
// Backend settings
BackendType string `json:"backend_type,omitempty"`
DeviceID int `json:"device_id"`
// Debug settings
VerboseLogging bool `json:"verbose_logging"`
DebugMode bool `json:"debug_mode"`
}
Config holds configuration options for gollama
func DefaultConfig ¶
func DefaultConfig() *Config
DefaultConfig returns a configuration with sensible defaults
func GetGlobalConfig ¶
func GetGlobalConfig() *Config
GetGlobalConfig returns the global configuration
func LoadConfig ¶
LoadConfig loads configuration from a JSON file
func LoadConfigFromEnv ¶
func LoadConfigFromEnv() *Config
LoadConfigFromEnv loads configuration from environment variables
func LoadDefaultConfig ¶
func LoadDefaultConfig() *Config
LoadDefaultConfig loads configuration from the default locations
func (*Config) SaveConfig ¶
SaveConfig saves configuration to a JSON file
type DownloadResult ¶
type DownloadResult struct {
Platform string
Success bool
Error error
LibraryPath string
SHA256Sum string
}
DownloadResult represents the result of a download task
func DownloadLibrariesForPlatforms ¶
func DownloadLibrariesForPlatforms(platforms []string, version string) ([]DownloadResult, error)
DownloadLibrariesForPlatforms downloads libraries for multiple platforms in parallel platforms should be in the format []string{"linux/amd64", "darwin/arm64", "windows/amd64"} version can be empty for latest version or specify a specific version like "b6099"
type DownloadTask ¶
type DownloadTask struct {
Platform string
AssetName string
DownloadURL string
TargetDir string
ExpectedSHA2 string
}
DownloadTask represents a single download task for parallel processing
type ErrorCategory ¶
type ErrorCategory int
ErrorCategory represents different categories of errors
const ( CategoryLibrary ErrorCategory = iota CategoryModel CategoryContext CategoryToken CategoryGeneration CategoryMemory CategoryConfig CategoryBackend CategoryFile CategoryParameter CategoryThread )
func CategorizeError ¶
func CategorizeError(err error) ErrorCategory
CategorizeError determines the category of an error
func (ErrorCategory) String ¶
func (ec ErrorCategory) String() string
String returns the string representation of an error category
type ErrorHandler ¶
type ErrorHandler struct {
// contains filtered or unexported fields
}
ErrorHandler provides centralized error handling and logging
func GetGlobalErrorHandler ¶
func GetGlobalErrorHandler() *ErrorHandler
GetGlobalErrorHandler returns the global error handler
func NewErrorHandler ¶
func NewErrorHandler(enableLogging bool) *ErrorHandler
NewErrorHandler creates a new error handler
func (*ErrorHandler) HandleError ¶
func (eh *ErrorHandler) HandleError(err error, context string) error
HandleError processes and logs an error
func (*ErrorHandler) SetLogCallback ¶
func (eh *ErrorHandler) SetLogCallback(callback func(level int, message string))
SetLogCallback sets the log callback function
type LibraryDownloader ¶
type LibraryDownloader struct {
// contains filtered or unexported fields
}
LibraryDownloader handles downloading pre-built llama.cpp binaries
func NewLibraryDownloader ¶
func NewLibraryDownloader() (*LibraryDownloader, error)
NewLibraryDownloader creates a new library downloader instance
func (*LibraryDownloader) CleanCache ¶
func (d *LibraryDownloader) CleanCache() error
CleanCache removes old cached library files
func (*LibraryDownloader) DownloadAndExtract ¶
func (d *LibraryDownloader) DownloadAndExtract(downloadURL, filename string) (string, error)
DownloadAndExtract downloads and extracts the library archive
func (*LibraryDownloader) DownloadAndExtractWithChecksum ¶
func (d *LibraryDownloader) DownloadAndExtractWithChecksum(downloadURL, filename, expectedChecksum string) (string, string, error)
DownloadAndExtractWithChecksum downloads and extracts the library archive with checksum verification
func (*LibraryDownloader) DownloadMultiplePlatforms ¶
func (d *LibraryDownloader) DownloadMultiplePlatforms(platforms []string, version string) ([]DownloadResult, error)
DownloadMultiplePlatforms downloads libraries for multiple platforms in parallel
func (*LibraryDownloader) FindAssetByPattern ¶
func (d *LibraryDownloader) FindAssetByPattern(release *ReleaseInfo, pattern string) (string, string, error)
FindAssetByPattern finds an asset that matches the given pattern
func (*LibraryDownloader) FindLibraryPath ¶
func (d *LibraryDownloader) FindLibraryPath(extractedDir string) (string, error)
FindLibraryPath finds the main library file in the extracted directory
func (*LibraryDownloader) FindLibraryPathForPlatform ¶
func (d *LibraryDownloader) FindLibraryPathForPlatform(extractedDir, goos string) (string, error)
FindLibraryPathForPlatform finds the main library file for a specific platform
func (*LibraryDownloader) GetLatestRelease ¶
func (d *LibraryDownloader) GetLatestRelease() (*ReleaseInfo, error)
GetLatestRelease fetches the latest release information from GitHub
func (*LibraryDownloader) GetPlatformAssetPattern ¶
func (d *LibraryDownloader) GetPlatformAssetPattern() (string, error)
GetPlatformAssetPattern returns the asset name pattern for the current platform
func (*LibraryDownloader) GetPlatformAssetPatternForPlatform ¶
func (d *LibraryDownloader) GetPlatformAssetPatternForPlatform(goos, goarch string) (string, error)
GetPlatformAssetPatternForPlatform returns the asset name pattern for a specific platform
func (*LibraryDownloader) GetReleaseByTag ¶
func (d *LibraryDownloader) GetReleaseByTag(tag string) (*ReleaseInfo, error)
GetReleaseByTag fetches release information for a specific tag
type LibraryLoader ¶
type LibraryLoader struct {
// contains filtered or unexported fields
}
Library loader manages the loading and lifecycle of llama.cpp shared libraries
func (*LibraryLoader) GetHandle ¶
func (l *LibraryLoader) GetHandle() uintptr
GetHandle returns the library handle
func (*LibraryLoader) IsLoaded ¶
func (l *LibraryLoader) IsLoaded() bool
IsLoaded returns whether the library is loaded
func (*LibraryLoader) LoadLibrary ¶
func (l *LibraryLoader) LoadLibrary() error
LoadLibrary loads the appropriate llama.cpp library for the current platform
func (*LibraryLoader) LoadLibraryWithVersion ¶
func (l *LibraryLoader) LoadLibraryWithVersion(version string) error
LoadLibraryWithVersion loads the llama.cpp library for a specific version If version is empty, it loads the latest version
func (*LibraryLoader) UnloadLibrary ¶
func (l *LibraryLoader) UnloadLibrary() error
UnloadLibrary unloads the library and cleans up resources
type LlamaAdapterLora ¶
type LlamaAdapterLora uintptr
type LlamaAttentionType ¶
type LlamaAttentionType int32
const ( LLAMA_ATTENTION_TYPE_CAUSAL LlamaAttentionType = 0 LLAMA_ATTENTION_TYPE_NON_CAUSAL LlamaAttentionType = 1 )
type LlamaBatch ¶
type LlamaBatch struct {
NTokens int32 // number of tokens
Token *LlamaToken // tokens
Embd *float32 // embeddings (if using embeddings instead of tokens)
Pos *LlamaPos // positions
NSeqId *int32 // number of sequence IDs per token
SeqId **LlamaSeqId // sequence IDs
Logits *int8 // whether to compute logits for each token
}
func Batch_get_one ¶
func Batch_get_one(tokens []LlamaToken) LlamaBatch
Batch_get_one creates a batch from a single set of tokens
func Batch_init ¶
func Batch_init(nTokens, embd, nSeqMax int32) LlamaBatch
Batch_init creates a new batch
type LlamaChatMessage ¶
Chat message
type LlamaContext ¶
type LlamaContext uintptr
func Init_from_model ¶
func Init_from_model(model LlamaModel, params LlamaContextParams) (LlamaContext, error)
Init_from_model creates a context from a model
type LlamaContextParams ¶
type LlamaContextParams struct {
Seed uint32 // RNG seed, -1 for random
NCtx uint32 // text context, 0 = from model
NBatch uint32 // logical maximum batch size
NUbatch uint32 // physical maximum batch size
NSeqMax uint32 // max number of sequences
NThreads int32 // number of threads to use for generation
NThreadsBatch int32 // number of threads to use for batch processing
RopeScalingType LlamaRopeScalingType // RoPE scaling type
PoolingType LlamaPoolingType // pooling type for embeddings
AttentionType LlamaAttentionType // attention type
RopeFreqBase float32 // RoPE base frequency
RopeFreqScale float32 // RoPE frequency scaling factor
YarnExtFactor float32 // YaRN extrapolation mix factor
YarnAttnFactor float32 // YaRN magnitude scaling factor
YarnBetaFast float32 // YaRN low correction dim
YarnBetaSlow float32 // YaRN high correction dim
YarnOrigCtx uint32 // YaRN original context size
DefragThold float32 // defragment the KV cache if holes/size > thold
CbEval uintptr // evaluation callback
CbEvalUserData uintptr // user data for evaluation callback
TypeK int32 // data type for K cache
TypeV int32 // data type for V cache
AbortCallback uintptr // abort callback
AbortCallbackData uintptr // user data for abort callback
Logits uint8 // whether to compute and return logits (bool as uint8)
Embeddings uint8 // whether to compute and return embeddings (bool as uint8)
Offload_kqv uint8 // whether to offload K, Q, V to GPU (bool as uint8)
FlashAttn uint8 // whether to use flash attention (bool as uint8)
NoPerf uint8 // whether to measure performance (bool as uint8)
}
Context parameters
func ContextDefaultParams ¶
func ContextDefaultParams() LlamaContextParams
func Context_default_params ¶
func Context_default_params() LlamaContextParams
Context_default_params returns default context parameters
type LlamaError ¶
type LlamaError struct {
Code int `json:"code"`
Message string `json:"message"`
Function string `json:"function,omitempty"`
File string `json:"file,omitempty"`
Line int `json:"line,omitempty"`
Cause error `json:"cause,omitempty"`
}
LlamaError represents a structured error from the llama.cpp library
func NewLlamaError ¶
func NewLlamaError(code int, message string) *LlamaError
NewLlamaError creates a new LlamaError
func NewLlamaErrorWithCause ¶
func NewLlamaErrorWithCause(code int, message string, cause error) *LlamaError
NewLlamaErrorWithCause creates a new LlamaError with an underlying cause
func NewLlamaErrorWithContext ¶
func NewLlamaErrorWithContext(code int, message, function string) *LlamaError
NewLlamaErrorWithContext creates a new LlamaError with function context
type LlamaFtype ¶
type LlamaFtype int32
const ( LLAMA_FTYPE_ALL_F32 LlamaFtype = 0 LLAMA_FTYPE_MOSTLY_F16 LlamaFtype = 1 LLAMA_FTYPE_MOSTLY_Q4_0 LlamaFtype = 2 LLAMA_FTYPE_MOSTLY_Q4_1 LlamaFtype = 3 LLAMA_FTYPE_MOSTLY_Q8_0 LlamaFtype = 7 LLAMA_FTYPE_MOSTLY_Q5_0 LlamaFtype = 8 LLAMA_FTYPE_MOSTLY_Q5_1 LlamaFtype = 9 LLAMA_FTYPE_MOSTLY_Q2_K LlamaFtype = 10 LLAMA_FTYPE_MOSTLY_Q3_K_S LlamaFtype = 11 LLAMA_FTYPE_MOSTLY_Q3_K_M LlamaFtype = 12 LLAMA_FTYPE_MOSTLY_Q3_K_L LlamaFtype = 13 LLAMA_FTYPE_MOSTLY_Q4_K_S LlamaFtype = 14 LLAMA_FTYPE_MOSTLY_Q4_K_M LlamaFtype = 15 LLAMA_FTYPE_MOSTLY_Q5_K_S LlamaFtype = 16 LLAMA_FTYPE_MOSTLY_Q5_K_M LlamaFtype = 17 LLAMA_FTYPE_MOSTLY_Q6_K LlamaFtype = 18 LLAMA_FTYPE_MOSTLY_IQ2_XXS LlamaFtype = 19 LLAMA_FTYPE_MOSTLY_IQ2_XS LlamaFtype = 20 LLAMA_FTYPE_MOSTLY_Q2_K_S LlamaFtype = 21 LLAMA_FTYPE_MOSTLY_IQ3_XS LlamaFtype = 22 )
type LlamaGpuBackend ¶
type LlamaGpuBackend int32
const ( LLAMA_GPU_BACKEND_NONE LlamaGpuBackend = 0 LLAMA_GPU_BACKEND_CPU LlamaGpuBackend = 1 LLAMA_GPU_BACKEND_CUDA LlamaGpuBackend = 2 LLAMA_GPU_BACKEND_METAL LlamaGpuBackend = 3 LLAMA_GPU_BACKEND_HIP LlamaGpuBackend = 4 LLAMA_GPU_BACKEND_VULKAN LlamaGpuBackend = 5 LLAMA_GPU_BACKEND_OPENCL LlamaGpuBackend = 6 LLAMA_GPU_BACKEND_SYCL LlamaGpuBackend = 7 )
func DetectGpuBackend ¶
func DetectGpuBackend() LlamaGpuBackend
DetectGpuBackend detects the available GPU backend on the current system
func (LlamaGpuBackend) String ¶
func (b LlamaGpuBackend) String() string
String returns the string representation of the GPU backend
type LlamaMemory ¶
type LlamaMemory uintptr
Common types matching llama.cpp
func Get_memory ¶
func Get_memory(ctx LlamaContext) LlamaMemory
Get_memory returns the memory handle for the context
type LlamaModel ¶
type LlamaModel uintptr
Opaque types (represented as pointers)
func Model_load_from_file ¶
func Model_load_from_file(pathModel string, params LlamaModelParams) (LlamaModel, error)
Model_load_from_file loads a model from a file
type LlamaModelParams ¶
type LlamaModelParams struct {
Devices uintptr // ggml_backend_dev_t * - NULL-terminated list of devices
TensorBuftOverrides uintptr // const struct llama_model_tensor_buft_override *
NGpuLayers int32 // number of layers to store in VRAM
SplitMode LlamaSplitMode // how to split the model across multiple GPUs
MainGpu int32 // the GPU that is used for the entire model
TensorSplit *float32 // proportion of the model to offload to each GPU
ProgressCallback uintptr // llama_progress_callback function pointer
ProgressCallbackUserData uintptr // context pointer passed to the progress callback
KvOverrides uintptr // const struct llama_model_kv_override *
VocabOnly uint8 // only load the vocabulary, no weights (bool as uint8)
UseMmap uint8 // use mmap if possible (bool as uint8)
UseMlock uint8 // force system to keep model in RAM (bool as uint8)
CheckTensors uint8 // validate model tensor data (bool as uint8)
UseExtraBufts uint8 // use extra buffer types (bool as uint8)
}
Model parameters
func ModelDefaultParams ¶
func ModelDefaultParams() LlamaModelParams
Helper functions for platforms where struct returns aren't supported
func Model_default_params ¶
func Model_default_params() LlamaModelParams
Model_default_params returns default model parameters
type LlamaModelQuantizeParams ¶
type LlamaModelQuantizeParams struct {
NThread int32 // number of threads to use for quantizing
Ftype LlamaFtype // quantize to this llama_ftype
OutputTensorType int32 // output tensor type
TokenEmbeddingType int32 // itoken embeddings tensor type
AllowRequantize uint8 // allow quantizing non-f32/f16 tensors (bool as uint8)
QuantizeOutputTensor uint8 // quantize output.weight (bool as uint8)
OnlyF32 uint8 // quantize only f32 tensors (bool as uint8)
PureF16 uint8 // disable k-quant mixtures and quantize all tensors to the same type (bool as uint8)
KeepSplit uint8 // keep split tensors (bool as uint8)
IMatrix *byte // importance matrix data
KqsWarning uint8 // warning for quantization quality loss (bool as uint8)
}
Model quantize parameters
type LlamaPoolingType ¶
type LlamaPoolingType int32
const ( LLAMA_POOLING_TYPE_UNSPECIFIED LlamaPoolingType = -1 LLAMA_POOLING_TYPE_NONE LlamaPoolingType = 0 LLAMA_POOLING_TYPE_MEAN LlamaPoolingType = 1 LLAMA_POOLING_TYPE_CLS LlamaPoolingType = 2 LLAMA_POOLING_TYPE_LAST LlamaPoolingType = 3 LLAMA_POOLING_TYPE_RANK LlamaPoolingType = 4 )
type LlamaRopeScalingType ¶
type LlamaRopeScalingType int32
const ( LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED LlamaRopeScalingType = -1 LLAMA_ROPE_SCALING_TYPE_NONE LlamaRopeScalingType = 0 LLAMA_ROPE_SCALING_TYPE_LINEAR LlamaRopeScalingType = 1 LLAMA_ROPE_SCALING_TYPE_YARN LlamaRopeScalingType = 2 )
type LlamaSampler ¶
type LlamaSampler uintptr
func Sampler_init_greedy ¶
func Sampler_init_greedy() LlamaSampler
Sampler_init_greedy creates a greedy sampler
type LlamaSamplerChainParams ¶
type LlamaSamplerChainParams struct {
NoPerf uint8 // whether to measure performance timings (bool as uint8)
}
Sampler chain parameters
func SamplerChainDefaultParams ¶
func SamplerChainDefaultParams() LlamaSamplerChainParams
func Sampler_chain_default_params ¶
func Sampler_chain_default_params() LlamaSamplerChainParams
Sampler_chain_default_params returns default sampler chain parameters
type LlamaSplitMode ¶
type LlamaSplitMode int32
const ( LLAMA_SPLIT_MODE_NONE LlamaSplitMode = 0 LLAMA_SPLIT_MODE_LAYER LlamaSplitMode = 1 LLAMA_SPLIT_MODE_ROW LlamaSplitMode = 2 )
type LlamaToken ¶
type LlamaToken int32
Common types matching llama.cpp
func Sampler_sample ¶
func Sampler_sample(sampler LlamaSampler, ctx LlamaContext, idx int32) LlamaToken
Sampler_sample samples a token from the logits at the given index (-1 for last token)
func Tokenize ¶
func Tokenize(model LlamaModel, text string, addSpecial, parseSpecial bool) ([]LlamaToken, error)
Tokenize tokenizes text
type LlamaTokenAttr ¶
type LlamaTokenAttr int32
const ( LLAMA_TOKEN_ATTR_UNDEFINED LlamaTokenAttr = 0 LLAMA_TOKEN_ATTR_UNKNOWN LlamaTokenAttr = 1 << 0 LLAMA_TOKEN_ATTR_UNUSED LlamaTokenAttr = 1 << 1 LLAMA_TOKEN_ATTR_NORMAL LlamaTokenAttr = 1 << 2 LLAMA_TOKEN_ATTR_CONTROL LlamaTokenAttr = 1 << 3 LLAMA_TOKEN_ATTR_USER_DEF LlamaTokenAttr = 1 << 4 LLAMA_TOKEN_ATTR_BYTE LlamaTokenAttr = 1 << 5 LLAMA_TOKEN_ATTR_LSTRIP LlamaTokenAttr = 1 << 6 LLAMA_TOKEN_ATTR_RSTRIP LlamaTokenAttr = 1 << 7 LLAMA_TOKEN_ATTR_SINGLE_WORD LlamaTokenAttr = 1 << 8 )
type LlamaTokenData ¶
type LlamaTokenData struct {
Id LlamaToken // token id
Logit float32 // log-odds of the token
P float32 // probability of the token
}
Structs
type LlamaTokenDataArray ¶
type LlamaTokenDataArray struct {
Data *LlamaTokenData // pointer to token data array
Size uint64 // number of tokens
Selected int64 // index of selected token (-1 if none)
Sorted uint8 // whether the array is sorted by probability (bool as uint8)
}
func Token_data_array_from_logits ¶
func Token_data_array_from_logits(model LlamaModel, logits *float32) *LlamaTokenDataArray
Token_data_array_from_logits creates a token data array from logits
func Token_data_array_init ¶
func Token_data_array_init(model LlamaModel) *LlamaTokenDataArray
Token_data_array_init creates a token data array (helper function)
type LlamaTokenType ¶
type LlamaTokenType int32
const ( LLAMA_TOKEN_TYPE_UNDEFINED LlamaTokenType = iota LLAMA_TOKEN_TYPE_NORMAL LLAMA_TOKEN_TYPE_UNKNOWN LLAMA_TOKEN_TYPE_CONTROL LLAMA_TOKEN_TYPE_USER_DEFINED LLAMA_TOKEN_TYPE_UNUSED LLAMA_TOKEN_TYPE_BYTE )
type LlamaVocab ¶
type LlamaVocab uintptr
type LlamaVocabType ¶
type LlamaVocabType int32
Enums
const ( LLAMA_VOCAB_TYPE_NONE LlamaVocabType = iota LLAMA_VOCAB_TYPE_SPM LLAMA_VOCAB_TYPE_BPE LLAMA_VOCAB_TYPE_WPM LLAMA_VOCAB_TYPE_UGM LLAMA_VOCAB_TYPE_RWKV )