About

The d9d.peft.lora package implements Low-Rank Adaptation. It works by wrapping existing Linear layers (both standard nn.Linear and d9d's GroupedLinear) with a container that holds the original frozen layer (base) and two low-rank trainable matrices (lora_A and lora_B).

Because the original layer is moved to a submodule (base), the state keys change. The LoRA method automatically generates a ModelStateMapperRename to handle this transparently during checkpoint loading.

Usage Example

import torch
import re
from d9d.peft import inject_peft_and_freeze, merge_peft
from d9d.peft.lora import LoRA, LoRAConfig, LoRAParameters

# 1. Configuration
config = LoRAConfig(
    module_name_pattern=re.compile(r".*attention\.q_proj.*"),  # Target Attention Q projections
    params=LoRAParameters(
        r=16,
        alpha=32,
        dropout=0.1
    )
)

# 2. Instantiate Method
method = LoRA(config)

# 3. Inject
# This replaces nn.Linear with LoRALinear layers in-place.
# 'mapper' knows how to route 'q_proj.weight' -> 'q_proj.base.weight'
mapper = inject_peft_and_freeze(method, model)

# ... pass 'mapper' object to d9d's Trainer or manually load a model checkpoint ...

# ... train a model ...

# 4. Merge - for exporting a model
merge_peft(method, model)

d9d.peft.lora

Package for Low-Rank Adaptation (LoRA) implementation.

LoRA

Bases: PeftMethod[LoRAConfig]

Implements the Low-Rank Adaptation (LoRA) injection strategy.

It scans the module structure for nn.Linear or GroupedLinear layers matching the configured name pattern. Matched layers are replaced with LoRA wrappers.

It also generates ModelStateMapperRename objects. Since the original weight layer.weight is now at layer.base.weight inside the wrapper, the mapper ensures that loading a standard checkpoint still works by redirecting the key.

Source code in d9d/peft/lora/method.py
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
class LoRA(PeftMethod[LoRAConfig]):
    """
    Implements the Low-Rank Adaptation (LoRA) injection strategy.

    It scans the module structure for `nn.Linear` or `GroupedLinear` layers matching
    the configured name pattern. Matched layers are replaced with LoRA wrappers.

    It also generates `ModelStateMapperRename` objects. Since the original weight
    `layer.weight` is now at `layer.base.weight` inside the wrapper, the mapper
    ensures that loading a standard checkpoint still works by redirecting the key.
    """

    def __init__(self, config: LoRAConfig):
        """
         Constructs a LoRA method.

         Args:
             config: LoRA configuration containing patterns and hyperparameters.
         """

        self._config = config

    def inject(self, module: nn.Module) -> PeftInjectionResult:
        params_to_train: list[nn.Parameter] = []
        state_mappers: list[ModelStateMapper] = []

        for mod_name, mod in named_modules_without_lora(module):
            if not isinstance(mod, _CAN_APPLY_MODULES):
                continue

            if not self._config.module_name_pattern.fullmatch(mod_name):
                continue

            lora_mod: LoRALinear | LoRAGroupedLinear
            if isinstance(mod, nn.Linear):
                lora_mod = LoRALinear(mod, self._config.params)
            elif isinstance(mod, GroupedLinear):
                lora_mod = LoRAGroupedLinear(mod, self._config.params)
            else:
                raise ValueError(f"Unknown layer {type(mod)} for LoRA")

            params_to_train.extend(lora_mod.lora_A.parameters())
            params_to_train.extend(lora_mod.lora_B.parameters())

            state_mappers.append(ModelStateMapperRename(
                name_from=f"{mod_name}.weight",
                name_to=f"{mod_name}.base.weight"
            ))

            module.set_submodule(mod_name, lora_mod)

        return PeftInjectionResult(
            parameters_to_train=params_to_train,
            load_state_mappers=state_mappers
        )

    def merge(self, module: nn.Module):
        for mod_name, mod in module.named_modules():
            if not isinstance(mod, _LORA_MODULES):
                continue

            if not self._config.module_name_pattern.fullmatch(mod_name):
                continue

            with torch.no_grad():
                orig_mod = mod.merge_with_base_()

            module.set_submodule(mod_name, orig_mod)

    @classmethod
    def from_config(cls, config: LoRAConfig) -> Self:
        return cls(config)

__init__(config)

Constructs a LoRA method.

Parameters:

Name Type Description Default
config LoRAConfig

LoRA configuration containing patterns and hyperparameters.

required
Source code in d9d/peft/lora/method.py
73
74
75
76
77
78
79
80
81
def __init__(self, config: LoRAConfig):
    """
     Constructs a LoRA method.

     Args:
         config: LoRA configuration containing patterns and hyperparameters.
     """

    self._config = config

LoRAConfig

Bases: BaseModel

Configuration for LoRA application.

Attributes:

Name Type Description
kind Literal['lora']

Discriminator field, always "lora".

module_name_pattern Pattern

Regular expression matching module names to wrap with LoRA.

params LoRAParameters

Hyperparameters for the LoRA layers.

Source code in d9d/peft/lora/config.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class LoRAConfig(BaseModel):
    """
    Configuration for LoRA application.

    Attributes:
        kind: Discriminator field, always "lora".
        module_name_pattern: Regular expression matching module names to wrap with LoRA.
        params: Hyperparameters for the LoRA layers.
    """

    kind: Literal["lora"] = "lora"

    module_name_pattern: Pattern
    params: LoRAParameters

LoRAGroupedLinear

Bases: Module

A LoRA wrapper around a GroupedLinear layer (commonly used in MoE or grouped query attention).

Attributes:

Name Type Description
lora_A

The A matrix (grouped linear).

lora_B

The B matrix (grouped linear).

base

The original base GroupedLinear layer.

dropout

Scaling dropout layer.

Source code in d9d/peft/lora/layer.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
class LoRAGroupedLinear(nn.Module):
    """
    A LoRA wrapper around a GroupedLinear layer (commonly used in MoE or grouped query attention).

    Attributes:
        lora_A: The A matrix (grouped linear).
        lora_B: The B matrix (grouped linear).
        base: The original base GroupedLinear layer.
        dropout: Scaling dropout layer.
    """

    def __init__(
            self,
            base_layer: GroupedLinear,
            params: LoRAParameters
    ):
        """
        Constructs a LoRAGroupedLinear layer.

        Args:
            base_layer: The original GroupedLinear layer to wrap.
            params: LoRA hyperparameters.
        """

        super().__init__()
        self.lora_A = GroupedLinear(
            base_layer.n_groups, base_layer.in_features, params.r,
            device=base_layer.weight.device,
            dtype=base_layer.weight.dtype
        )
        self.lora_B = GroupedLinear(
            base_layer.n_groups,
            params.r,
            base_layer.out_features,
            device=base_layer.weight.device,
            dtype=base_layer.weight.dtype
        )
        self.base = base_layer

        self.dropout = nn.Dropout(params.dropout)

        self._scale = params.alpha / params.r

        self.reset_parameters()

    def forward(self, x: torch.Tensor, x_groups: torch.Tensor) -> torch.Tensor:
        """
        Computes forward pass for grouped inputs.

        Args:
            x: Input tensor.
            x_groups: A tensor indicating group indices for each input.

        Returns:
            Combined output of base and LoRA path.
        """

        base_x = self.base(x, x_groups)
        adapt_x = self._scale * self.lora_B(self.lora_A(self.dropout(x), x_groups), x_groups)
        return base_x + adapt_x

    @torch.no_grad()
    def merge_with_base_(self) -> GroupedLinear:
        """
        Collapse the LoRA weights into the base GroupedLinear layer.

        Returns:
            The modified GroupedLinear layer.
        """

        mod = self.base
        mod.weight.data += (torch.bmm(self.lora_A.weight.data, self.lora_B.weight.data)) * self._scale
        return mod

    def reset_parameters(self):
        """
        Resets LoRA parameters. A is random, B is zeroed.
        """

        self.lora_A.reset_parameters()
        nn.init.zeros_(self.lora_B.weight)

__init__(base_layer, params)

Constructs a LoRAGroupedLinear layer.

Parameters:

Name Type Description Default
base_layer GroupedLinear

The original GroupedLinear layer to wrap.

required
params LoRAParameters

LoRA hyperparameters.

required
Source code in d9d/peft/lora/layer.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def __init__(
        self,
        base_layer: GroupedLinear,
        params: LoRAParameters
):
    """
    Constructs a LoRAGroupedLinear layer.

    Args:
        base_layer: The original GroupedLinear layer to wrap.
        params: LoRA hyperparameters.
    """

    super().__init__()
    self.lora_A = GroupedLinear(
        base_layer.n_groups, base_layer.in_features, params.r,
        device=base_layer.weight.device,
        dtype=base_layer.weight.dtype
    )
    self.lora_B = GroupedLinear(
        base_layer.n_groups,
        params.r,
        base_layer.out_features,
        device=base_layer.weight.device,
        dtype=base_layer.weight.dtype
    )
    self.base = base_layer

    self.dropout = nn.Dropout(params.dropout)

    self._scale = params.alpha / params.r

    self.reset_parameters()

forward(x, x_groups)

Computes forward pass for grouped inputs.

Parameters:

Name Type Description Default
x Tensor

Input tensor.

required
x_groups Tensor

A tensor indicating group indices for each input.

required

Returns:

Type Description
Tensor

Combined output of base and LoRA path.

Source code in d9d/peft/lora/layer.py
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def forward(self, x: torch.Tensor, x_groups: torch.Tensor) -> torch.Tensor:
    """
    Computes forward pass for grouped inputs.

    Args:
        x: Input tensor.
        x_groups: A tensor indicating group indices for each input.

    Returns:
        Combined output of base and LoRA path.
    """

    base_x = self.base(x, x_groups)
    adapt_x = self._scale * self.lora_B(self.lora_A(self.dropout(x), x_groups), x_groups)
    return base_x + adapt_x

merge_with_base_()

Collapse the LoRA weights into the base GroupedLinear layer.

Returns:

Type Description
GroupedLinear

The modified GroupedLinear layer.

Source code in d9d/peft/lora/layer.py
158
159
160
161
162
163
164
165
166
167
168
169
@torch.no_grad()
def merge_with_base_(self) -> GroupedLinear:
    """
    Collapse the LoRA weights into the base GroupedLinear layer.

    Returns:
        The modified GroupedLinear layer.
    """

    mod = self.base
    mod.weight.data += (torch.bmm(self.lora_A.weight.data, self.lora_B.weight.data)) * self._scale
    return mod

reset_parameters()

Resets LoRA parameters. A is random, B is zeroed.

Source code in d9d/peft/lora/layer.py
171
172
173
174
175
176
177
def reset_parameters(self):
    """
    Resets LoRA parameters. A is random, B is zeroed.
    """

    self.lora_A.reset_parameters()
    nn.init.zeros_(self.lora_B.weight)

LoRALinear

Bases: Module

A LoRA wrapper around a standard PyTorch Linear layer.

Wraps a base linear layer and adds low-rank adaptation matrices A and B.

Attributes:

Name Type Description
lora_A

The A matrix (in_features -> r).

lora_B

The B matrix (r -> out_features).

base

The original base Linear layer.

dropout Dropout

Scaling dropout layer.

Source code in d9d/peft/lora/layer.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
class LoRALinear(nn.Module):
    """
    A LoRA wrapper around a standard PyTorch Linear layer.

    Wraps a base linear layer and adds low-rank adaptation matrices A and B.

    Attributes:
        lora_A: The A matrix (in_features -> r).
        lora_B: The B matrix (r -> out_features).
        base: The original base Linear layer.
        dropout: Scaling dropout layer.
    """

    def __init__(
            self,
            base_layer: nn.Linear,
            params: LoRAParameters
    ):
        """
        Constructs a LoRALinear layer.

        Args:
            base_layer: The original Linear layer to wrap.
            params: LoRA hyperparameters (r, alpha, dropout).

        Raises:
            ValueError: If the base layer has a bias (currently unsupported).
        """

        super().__init__()
        self.lora_A = nn.Linear(
            base_layer.in_features, params.r, bias=False,
            device=base_layer.weight.device,
            dtype=base_layer.weight.dtype
        )
        self.lora_B = nn.Linear(
            params.r, base_layer.out_features, bias=False,
            device=base_layer.weight.device,
            dtype=base_layer.weight.dtype
        )
        self.base = base_layer

        if base_layer.bias is not None:
            raise ValueError("LoRA is unsupported with biased linear layers")

        self.dropout: nn.Dropout = nn.Dropout(params.dropout)

        self._scale: float = params.alpha / params.r

        self.reset_parameters()

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Takes input tensor, computes base output and LoRA adaptation, and returns the sum.

        Args:
            x: Input tensor.

        Returns:
            The output of base(x) + scale * (B @ A @ dropout(x)).
        """

        base_x = self.base(x)
        adapt_x = self._scale * self.lora_B(self.lora_A(self.dropout(x)))
        return base_x + adapt_x

    @torch.no_grad()
    def merge_with_base_(self) -> nn.Linear:
        """
        Collapse the LoRA weights into the base linear layer.

        Returns:
            The modified base linear layer with updated weights.
        """

        mod = self.base
        mod.weight.data += (self.lora_B.weight.data @ self.lora_A.weight.data) * self._scale
        return mod

    def reset_parameters(self):
        """
        Resets LoRA parameters. A is random, B is zeroed.
        """

        self.lora_A.reset_parameters()
        nn.init.zeros_(self.lora_B.weight)

__init__(base_layer, params)

Constructs a LoRALinear layer.

Parameters:

Name Type Description Default
base_layer Linear

The original Linear layer to wrap.

required
params LoRAParameters

LoRA hyperparameters (r, alpha, dropout).

required

Raises:

Type Description
ValueError

If the base layer has a bias (currently unsupported).

Source code in d9d/peft/lora/layer.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def __init__(
        self,
        base_layer: nn.Linear,
        params: LoRAParameters
):
    """
    Constructs a LoRALinear layer.

    Args:
        base_layer: The original Linear layer to wrap.
        params: LoRA hyperparameters (r, alpha, dropout).

    Raises:
        ValueError: If the base layer has a bias (currently unsupported).
    """

    super().__init__()
    self.lora_A = nn.Linear(
        base_layer.in_features, params.r, bias=False,
        device=base_layer.weight.device,
        dtype=base_layer.weight.dtype
    )
    self.lora_B = nn.Linear(
        params.r, base_layer.out_features, bias=False,
        device=base_layer.weight.device,
        dtype=base_layer.weight.dtype
    )
    self.base = base_layer

    if base_layer.bias is not None:
        raise ValueError("LoRA is unsupported with biased linear layers")

    self.dropout: nn.Dropout = nn.Dropout(params.dropout)

    self._scale: float = params.alpha / params.r

    self.reset_parameters()

forward(x)

Takes input tensor, computes base output and LoRA adaptation, and returns the sum.

Parameters:

Name Type Description Default
x Tensor

Input tensor.

required

Returns:

Type Description
Tensor

The output of base(x) + scale * (B @ A @ dropout(x)).

Source code in d9d/peft/lora/layer.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def forward(self, x: torch.Tensor) -> torch.Tensor:
    """
    Takes input tensor, computes base output and LoRA adaptation, and returns the sum.

    Args:
        x: Input tensor.

    Returns:
        The output of base(x) + scale * (B @ A @ dropout(x)).
    """

    base_x = self.base(x)
    adapt_x = self._scale * self.lora_B(self.lora_A(self.dropout(x)))
    return base_x + adapt_x

merge_with_base_()

Collapse the LoRA weights into the base linear layer.

Returns:

Type Description
Linear

The modified base linear layer with updated weights.

Source code in d9d/peft/lora/layer.py
75
76
77
78
79
80
81
82
83
84
85
86
@torch.no_grad()
def merge_with_base_(self) -> nn.Linear:
    """
    Collapse the LoRA weights into the base linear layer.

    Returns:
        The modified base linear layer with updated weights.
    """

    mod = self.base
    mod.weight.data += (self.lora_B.weight.data @ self.lora_A.weight.data) * self._scale
    return mod

reset_parameters()

Resets LoRA parameters. A is random, B is zeroed.

Source code in d9d/peft/lora/layer.py
88
89
90
91
92
93
94
def reset_parameters(self):
    """
    Resets LoRA parameters. A is random, B is zeroed.
    """

    self.lora_A.reset_parameters()
    nn.init.zeros_(self.lora_B.weight)

LoRAParameters

Bases: BaseModel

Hyperparameters for LoRA layers.

Attributes:

Name Type Description
r int

Rank of the low-rank adaptation matrices.

alpha int

Scaling factor for the learned weights.

dropout float

Dropout probability for the input to LoRA layers.

Source code in d9d/peft/lora/config.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
class LoRAParameters(BaseModel):
    """
    Hyperparameters for LoRA layers.

    Attributes:
        r: Rank of the low-rank adaptation matrices.
        alpha: Scaling factor for the learned weights.
        dropout: Dropout probability for the input to LoRA layers.
    """

    r: int
    alpha: int
    dropout: float