@@ -20,6 +20,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};
2020// from ggml.h: enum ggml_scale_mode, enum ggml_scale_flag
2121#define NEAREST 0
2222#define BILINEAR 1
23+ #define BICUBIC 2
2324
2425layout (constant_id = 0) const uint scale_mode = 0;
2526
@@ -61,6 +62,39 @@ float interpolate_bilinear(uint i10, uint i11, uint i12, uint i13) {
6162 return fetch_bilinear(c0, c1, d, i12, i13);
6263}
6364
65+ // Bicubic interpolation with alpha = -0.75
66+ // https://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm
67+ const vec4 bcoeffs1 = vec4( 1.25, -2.25, 0.0, 1.0);
68+ const vec4 bcoeffs2 = vec4(-0.75, 3.75, -6.0, 3.0);
69+ vec4 powers(float x) { return vec4(x*x*x, x*x, x, 1); }
70+
71+ float bicubic(float p0, float p1, float p2, float p3, float x) {
72+ return p0 * dot(bcoeffs2, powers(x + 1)) +
73+ p1 * dot(bcoeffs1, powers(x )) +
74+ p2 * dot(bcoeffs1, powers(1 - x)) +
75+ p3 * dot(bcoeffs2, powers(2 - x));
76+ }
77+
78+ #define FETCH(a,b) data_a[base + clamp(i.x+(a), 0, res.x) * p.nb00 + clamp(i.y+(b), 0, res.y) * p.nb01]
79+
80+ float interpolate_bicubic(uint i10, uint i11, uint i12, uint i13) {
81+ const ivec2 res = ivec2(p.ne00 - 1, p.ne01 - 1);
82+
83+ const vec2 coord = (vec2(i10, i11) + p.pixel_offset) / vec2(p.sf0, p.sf1) - p.pixel_offset;
84+ const vec2 d = fract(coord);
85+ const ivec2 i = ivec2(floor(coord));
86+
87+ const uint i02 = uint(i12 / p.sf2);
88+ const uint i03 = uint(i13 / p.sf3);
89+ const uint base = p.a_offset + i03 * p.nb03 + i02 * p.nb02;
90+
91+ return bicubic(
92+ bicubic(FETCH(-1,-1), FETCH(0,-1), FETCH(1,-1), FETCH(2,-1), d.x),
93+ bicubic(FETCH(-1, 0), FETCH(0, 0), FETCH(1, 0), FETCH(2, 0), d.x),
94+ bicubic(FETCH(-1, 1), FETCH(0, 1), FETCH(1, 1), FETCH(2, 1), d.x),
95+ bicubic(FETCH(-1, 2), FETCH(0, 2), FETCH(1, 2), FETCH(2, 2), d.x), d.y);
96+ }
97+
6498void main() {
6599 const uint idx = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
66100
@@ -81,6 +115,9 @@ void main() {
81115 case BILINEAR:
82116 result = interpolate_bilinear(i10, i11, i12, i13);
83117 break;
118+ case BICUBIC:
119+ result = interpolate_bicubic(i10, i11, i12, i13);
120+ break;
84121 }
85122
86123 data_d[p.d_offset + idx] = D_TYPE(result);
0 commit comments