MyArena/MyLayer.py

import torch
import torch.nn as nn
import math
import matplotlib.pyplot as plt


class SoftTanhFunction(torch.autograd.Function):
    '''
    SoftTanh(x) = sign(x) * tanh(ln(1 + ln(1 + |x|)))
    f(x) ∈ (-1, 1)
    dy/dx ∈ (0, 1]
    '''
    @staticmethod
    def forward(ctx, x):
        abs_x = torch.abs(x)
        u = torch.log1p(abs_x)
        v = torch.log1p(u)
        tanh_v = torch.tanh(v)
        y = torch.sign(x) * tanh_v
        ctx.save_for_backward(abs_x)
        return y

    @staticmethod
    def backward(ctx, grad_output):
        abs_x, = ctx.saved_tensors
        u = torch.log1p(abs_x)
        v = torch.log1p(u)
        tanh_v = torch.tanh(v)
        sech_v_square = 1 - tanh_v.square()
        denominator = (1 + u) * (1 + abs_x)
        d_x = sech_v_square / denominator
        grad_x = grad_output * d_x
        return grad_x


def SoftTanh(x):
    return SoftTanhFunction.apply(x)


class SoftSigmoidFunction(torch.autograd.Function):
    '''
    SoftSigmoid(x) = (sign(x) * tanh(ln(1 + ln(1 + |2x|))) + 1) / 2
    f(x) ∈ (0, 1)
    dy/dx ∈ (0, 1]
    '''
    @staticmethod
    def forward(ctx, x):
        abs_2x = torch.abs(2 * x)
        u = torch.log1p(abs_2x)
        v = torch.log1p(u)
        tanh_v = torch.tanh(v)
        y = (torch.sign(x) * tanh_v + 1) * 0.5
        ctx.save_for_backward(abs_2x)
        return y
    
    @staticmethod
    def backward(ctx, grad_output):
        abs_2x, = ctx.saved_tensors
        u = torch.log1p(abs_2x)
        v = torch.log1p(u)
        tanh_v = torch.tanh(v)
        sech_v_square = 1 - tanh_v.square()
        denominator = (1 + u) * (1 + abs_2x)
        d_x = sech_v_square / denominator
        grad_x = grad_output * d_x
        return grad_x


def SoftSigmoid(x):
    return SoftSigmoidFunction.apply(x)


class AdaptiveSoftTanhFunction(torch.autograd.Function):
    '''
    AdaptiveSoftTanh(x) = alpha * sign(x) * tanh(ln(1 + ln(1 + |x|))) + beta
    f(x) ∈ (beta - |alpha|, beta + |alpha|)
    dy/dx ∈ (0, alpha] if alpha > 0
    dy/dx ∈ [alpha, 0) if alpha < 0
    '''
    @staticmethod
    def forward(ctx, x, alpha, beta):
        abs_x = torch.abs(x)
        u = torch.log1p(abs_x)
        v = torch.log1p(u)
        tanh_v = torch.tanh(v)
        y = torch.sign(x) * tanh_v * alpha + beta
        ctx.save_for_backward(x, alpha)
        return y
    
    @staticmethod
    def backward(ctx, grad_output):
        x, alpha, = ctx.saved_tensors
        abs_x = torch.abs(x)
        u = torch.log1p(abs_x)
        v = torch.log1p(u)
        tanh_v = torch.tanh(v)
        sech_v_square = 1 - tanh_v.square()
        denominator = (1 + u) * (1 + abs_x)

        d_x = sech_v_square / denominator

        grad_x = grad_output * d_x
        grad_alpha = grad_output * torch.sign(x) * tanh_v
        grad_beta = grad_output.clone()
        
        sum_dims = [d for d in range(grad_output.dim()) if d != grad_output.dim() - 1]
        if sum_dims:
            grad_alpha = grad_alpha.sum(dim=sum_dims)
            grad_beta = grad_beta.sum(dim=sum_dims)
        
        return grad_x, grad_alpha, grad_beta


class AdaptiveSoftTanh(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.alpha = nn.Parameter(torch.Tensor(channels))
        self.beta = nn.Parameter(torch.zeros(channels))
        nn.init.normal_(self.alpha, mean=0, std=math.sqrt(2 / channels))
    
    def forward(self, x):
        return AdaptiveSoftTanhFunction.apply(x, self.alpha, self.beta)


if __name__ == "__main__":
    print("=== 测试 x=0 的情况 ===")
    
    # 创建包含0的输入张量
    x = torch.tensor([0.0], dtype=torch.float32, requires_grad=True)
    
    # 前向传播
    y = SoftTanh(x)
    print(f"前向输出 (x=0): {y.item()}")
    
    # 反向传播
    y.backward()
    print(f"梯度值 (x=0): {x.grad.item()}")
    
    # 验证梯度计算
    grad_ok = torch.allclose(x.grad, torch.tensor([1.0]))
    print(f"梯度验证: {'通过' if grad_ok else '失败'}")

    x = torch.linspace(-10, 10, 10000, requires_grad=True)

    y_soft = SoftTanh(x)
    y_tanh = torch.tanh(x)

    grad_soft = torch.autograd.grad(y_soft, x, torch.ones_like(y_soft))[0]
    grad_tanh = torch.autograd.grad(y_tanh, x, torch.ones_like(y_tanh))[0]

    x_np = x.detach().numpy()
    y_soft_np = y_soft.detach().numpy()
    y_tanh_np = y_tanh.detach().numpy()
    grad_soft_np = grad_soft.detach().numpy()
    grad_tanh_np = grad_tanh.detach().numpy()

    plt.figure(figsize=(12, 10))
    # function graph
    plt.subplot(2, 1, 1)
    plt.plot(x_np, y_soft_np, 'b-', linewidth=2, label='SoftTanh')
    plt.plot(x_np, y_tanh_np, 'r--', linewidth=2, label='Tanh')
    plt.title('Function Comparison')
    plt.xlabel('x')
    plt.ylabel('y')
    plt.grid(True)
    plt.legend()
    # grad graph
    plt.subplot(2, 1, 2)
    plt.plot(x_np, grad_soft_np, 'g-', linewidth=2, label='SoftTanh Gradient')
    plt.plot(x_np, grad_tanh_np, 'm--', linewidth=2, label='Tanh Gradient')
    plt.title('Gradient Comparison')
    plt.xlabel('x')
    plt.ylabel('dy/dx')
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.savefig('./softtanh_comparison.png', dpi=300)
    plt.close()

    # 使用对数空间生成x值，只考虑正半轴
    x = torch.logspace(-2, 3, 100000, base=10, requires_grad=True)
    
    y_soft = SoftTanh(x)
    y_tanh = torch.tanh(x)
    
    grad_soft = torch.autograd.grad(y_soft, x, torch.ones_like(y_soft))[0]
    grad_tanh = torch.autograd.grad(y_tanh, x, torch.ones_like(y_tanh))[0]
    
    # 转换为numpy数组用于绘图
    x_np = x.detach().numpy()
    y_soft_np = y_soft.detach().numpy()
    y_tanh_np = y_tanh.detach().numpy()
    grad_soft_np = grad_soft.detach().numpy()
    grad_tanh_np = grad_tanh.detach().numpy()
    
    # 创建图形和子图
    plt.figure(figsize=(14, 12))
    
    # 函数图像（对数坐标）
    plt.subplot(2, 1, 1)
    plt.loglog(x_np, 1 - y_soft_np, 'b-', linewidth=2, label='SoftTanh (1-y)')
    plt.loglog(x_np, 1 - y_tanh_np, 'r--', linewidth=2, label='Tanh (1-y)')
    plt.title('Function Asymptotic Behavior (Log-Log Scale)')
    plt.xlabel('x (log scale)')
    plt.ylabel('1 - y (log scale)')
    plt.grid(True, which="both", ls="--")
    plt.legend()
    
    # 梯度图像（对数坐标）
    plt.subplot(2, 1, 2)
    plt.loglog(x_np, grad_soft_np, 'g-', linewidth=2, label='SoftTanh Gradient')
    plt.loglog(x_np, grad_tanh_np, 'm--', linewidth=2, label='Tanh Gradient')
    plt.title('Gradient Decay (Log-Log Scale)')
    plt.xlabel('x (log scale)')
    plt.ylabel('dy/dx (log scale)')
    plt.grid(True, which="both", ls="--")
    plt.legend()
    
    plt.tight_layout()
    plt.savefig('./softtanh_log_comparison.png', dpi=300)
    plt.close()
initial commit 5 days ago			`import torch`
			`import torch.nn as nn`
			`import math`
			`import matplotlib.pyplot as plt`


			`class SoftTanhFunction(torch.autograd.Function):`
			`'''`
			`SoftTanh(x) = sign(x) * tanh(ln(1 + ln(1 + \|x\|)))`
			`f(x) ∈ (-1, 1)`
			`dy/dx ∈ (0, 1]`
			`'''`
			`@staticmethod`
			`def forward(ctx, x):`
			`abs_x = torch.abs(x)`
			`u = torch.log1p(abs_x)`
			`v = torch.log1p(u)`
			`tanh_v = torch.tanh(v)`
			`y = torch.sign(x) * tanh_v`
			`ctx.save_for_backward(abs_x)`
			`return y`

			`@staticmethod`
			`def backward(ctx, grad_output):`
			`abs_x, = ctx.saved_tensors`
			`u = torch.log1p(abs_x)`
			`v = torch.log1p(u)`
			`tanh_v = torch.tanh(v)`
			`sech_v_square = 1 - tanh_v.square()`
			`denominator = (1 + u) * (1 + abs_x)`
			`d_x = sech_v_square / denominator`
			`grad_x = grad_output * d_x`
			`return grad_x`


			`def SoftTanh(x):`
			`return SoftTanhFunction.apply(x)`


			`class SoftSigmoidFunction(torch.autograd.Function):`
			`'''`
			`SoftSigmoid(x) = (sign(x) * tanh(ln(1 + ln(1 + \|2x\|))) + 1) / 2`
			`f(x) ∈ (0, 1)`
			`dy/dx ∈ (0, 1]`
			`'''`
			`@staticmethod`
			`def forward(ctx, x):`
			`abs_2x = torch.abs(2 * x)`
			`u = torch.log1p(abs_2x)`
			`v = torch.log1p(u)`
			`tanh_v = torch.tanh(v)`
			`y = (torch.sign(x) * tanh_v + 1) * 0.5`
			`ctx.save_for_backward(abs_2x)`
			`return y`

			`@staticmethod`
			`def backward(ctx, grad_output):`
			`abs_2x, = ctx.saved_tensors`
			`u = torch.log1p(abs_2x)`
			`v = torch.log1p(u)`
			`tanh_v = torch.tanh(v)`
			`sech_v_square = 1 - tanh_v.square()`
			`denominator = (1 + u) * (1 + abs_2x)`
			`d_x = sech_v_square / denominator`
			`grad_x = grad_output * d_x`
			`return grad_x`


			`def SoftSigmoid(x):`
			`return SoftSigmoidFunction.apply(x)`


			`class AdaptiveSoftTanhFunction(torch.autograd.Function):`
			`'''`
			`AdaptiveSoftTanh(x) = alpha * sign(x) * tanh(ln(1 + ln(1 + \|x\|))) + beta`
			`f(x) ∈ (beta - \|alpha\|, beta + \|alpha\|)`
			`dy/dx ∈ (0, alpha] if alpha > 0`
			`dy/dx ∈ [alpha, 0) if alpha < 0`
			`'''`
			`@staticmethod`
			`def forward(ctx, x, alpha, beta):`
			`abs_x = torch.abs(x)`
			`u = torch.log1p(abs_x)`
			`v = torch.log1p(u)`
			`tanh_v = torch.tanh(v)`
			`y = torch.sign(x) * tanh_v * alpha + beta`
			`ctx.save_for_backward(x, alpha)`
			`return y`

			`@staticmethod`
			`def backward(ctx, grad_output):`
			`x, alpha, = ctx.saved_tensors`
			`abs_x = torch.abs(x)`
			`u = torch.log1p(abs_x)`
			`v = torch.log1p(u)`
			`tanh_v = torch.tanh(v)`
			`sech_v_square = 1 - tanh_v.square()`
			`denominator = (1 + u) * (1 + abs_x)`

			`d_x = sech_v_square / denominator`

			`grad_x = grad_output * d_x`
			`grad_alpha = grad_output * torch.sign(x) * tanh_v`
			`grad_beta = grad_output.clone()`

			`sum_dims = [d for d in range(grad_output.dim()) if d != grad_output.dim() - 1]`
			`if sum_dims:`
			`grad_alpha = grad_alpha.sum(dim=sum_dims)`
			`grad_beta = grad_beta.sum(dim=sum_dims)`

			`return grad_x, grad_alpha, grad_beta`


			`class AdaptiveSoftTanh(nn.Module):`
			`def __init__(self, channels):`
			`super().__init__()`
			`self.alpha = nn.Parameter(torch.Tensor(channels))`
			`self.beta = nn.Parameter(torch.zeros(channels))`
			`nn.init.normal_(self.alpha, mean=0, std=math.sqrt(2 / channels))`

			`def forward(self, x):`
			`return AdaptiveSoftTanhFunction.apply(x, self.alpha, self.beta)`


			`if __name__ == "__main__":`
			`print("=== 测试 x=0 的情况 ===")`

			`# 创建包含0的输入张量`
			`x = torch.tensor([0.0], dtype=torch.float32, requires_grad=True)`

			`# 前向传播`
			`y = SoftTanh(x)`
			`print(f"前向输出 (x=0): {y.item()}")`

			`# 反向传播`
			`y.backward()`
			`print(f"梯度值 (x=0): {x.grad.item()}")`

			`# 验证梯度计算`
			`grad_ok = torch.allclose(x.grad, torch.tensor([1.0]))`
			`print(f"梯度验证: {'通过' if grad_ok else '失败'}")`

			`x = torch.linspace(-10, 10, 10000, requires_grad=True)`

			`y_soft = SoftTanh(x)`
			`y_tanh = torch.tanh(x)`

			`grad_soft = torch.autograd.grad(y_soft, x, torch.ones_like(y_soft))[0]`
			`grad_tanh = torch.autograd.grad(y_tanh, x, torch.ones_like(y_tanh))[0]`

			`x_np = x.detach().numpy()`
			`y_soft_np = y_soft.detach().numpy()`
			`y_tanh_np = y_tanh.detach().numpy()`
			`grad_soft_np = grad_soft.detach().numpy()`
			`grad_tanh_np = grad_tanh.detach().numpy()`

			`plt.figure(figsize=(12, 10))`
			`# function graph`
			`plt.subplot(2, 1, 1)`
			`plt.plot(x_np, y_soft_np, 'b-', linewidth=2, label='SoftTanh')`
			`plt.plot(x_np, y_tanh_np, 'r--', linewidth=2, label='Tanh')`
			`plt.title('Function Comparison')`
			`plt.xlabel('x')`
			`plt.ylabel('y')`
			`plt.grid(True)`
			`plt.legend()`
			`# grad graph`
			`plt.subplot(2, 1, 2)`
			`plt.plot(x_np, grad_soft_np, 'g-', linewidth=2, label='SoftTanh Gradient')`
			`plt.plot(x_np, grad_tanh_np, 'm--', linewidth=2, label='Tanh Gradient')`
			`plt.title('Gradient Comparison')`
			`plt.xlabel('x')`
			`plt.ylabel('dy/dx')`
			`plt.grid(True)`
			`plt.legend()`
			`plt.tight_layout()`
			`plt.savefig('./softtanh_comparison.png', dpi=300)`
			`plt.close()`

			`# 使用对数空间生成x值，只考虑正半轴`
			`x = torch.logspace(-2, 3, 100000, base=10, requires_grad=True)`

			`y_soft = SoftTanh(x)`
			`y_tanh = torch.tanh(x)`

			`grad_soft = torch.autograd.grad(y_soft, x, torch.ones_like(y_soft))[0]`
			`grad_tanh = torch.autograd.grad(y_tanh, x, torch.ones_like(y_tanh))[0]`

			`# 转换为numpy数组用于绘图`
			`x_np = x.detach().numpy()`
			`y_soft_np = y_soft.detach().numpy()`
			`y_tanh_np = y_tanh.detach().numpy()`
			`grad_soft_np = grad_soft.detach().numpy()`
			`grad_tanh_np = grad_tanh.detach().numpy()`

			`# 创建图形和子图`
			`plt.figure(figsize=(14, 12))`

			`# 函数图像（对数坐标）`
			`plt.subplot(2, 1, 1)`
			`plt.loglog(x_np, 1 - y_soft_np, 'b-', linewidth=2, label='SoftTanh (1-y)')`
			`plt.loglog(x_np, 1 - y_tanh_np, 'r--', linewidth=2, label='Tanh (1-y)')`
			`plt.title('Function Asymptotic Behavior (Log-Log Scale)')`
			`plt.xlabel('x (log scale)')`
			`plt.ylabel('1 - y (log scale)')`
			`plt.grid(True, which="both", ls="--")`
			`plt.legend()`

			`# 梯度图像（对数坐标）`
			`plt.subplot(2, 1, 2)`
			`plt.loglog(x_np, grad_soft_np, 'g-', linewidth=2, label='SoftTanh Gradient')`
			`plt.loglog(x_np, grad_tanh_np, 'm--', linewidth=2, label='Tanh Gradient')`
			`plt.title('Gradient Decay (Log-Log Scale)')`
			`plt.xlabel('x (log scale)')`
			`plt.ylabel('dy/dx (log scale)')`
			`plt.grid(True, which="both", ls="--")`
			`plt.legend()`

			`plt.tight_layout()`
			`plt.savefig('./softtanh_log_comparison.png', dpi=300)`
			`plt.close()`