src/http/url_path.zig


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

usingnamespace @import("../global.zig");

const PercentEncoding = @import("../query_string_map.zig").PercentEncoding;
const std = @import("std");
const allocators = @import("../allocators.zig");
const URLPath = @This();

extname: string = "",
path: string = "",
pathname: string = "",
first_segment: string = "",
query_string: string = "",
needs_redirect: bool = false,
const toMutable = allocators.constStrToU8;

// TODO: use a real URL parser
// this treats a URL like /_next/ identically to /
pub fn pathWithoutAssetPrefix(this: *const URLPath, asset_prefix: string) string {
    if (asset_prefix.len == 0) return this.path;
    const leading_slash_offset: usize = if (asset_prefix[0] == '/') 1 else 0;
    const base = this.path;
    const origin = asset_prefix[leading_slash_offset..];

    return if (base.len >= origin.len and strings.eql(base[0..origin.len], origin)) base[origin.len..] else base;
}

// optimization: very few long strings will be URL-encoded
// we're allocating virtual memory here, so if we never use it, it won't be allocated
// and even when they're, they're probably rarely going to be > 1024 chars long
// so we can have a big and little one and almost always use the little one
threadlocal var temp_path_buf: [1024]u8 = undefined;
threadlocal var big_temp_path_buf: [16384]u8 = undefined;

pub fn parse(possibly_encoded_pathname_: string) !URLPath {
    var decoded_pathname = possibly_encoded_pathname_;
    var needs_redirect = false;
    var invalid_uri = false;

    if (strings.indexOfChar(decoded_pathname, '%') != null) {
        var possibly_encoded_pathname = switch (decoded_pathname.len) {
            0...1024 => &temp_path_buf,
            else => &big_temp_path_buf,
        };
        possibly_encoded_pathname = possibly_encoded_pathname[0..std.math.min(
            possibly_encoded_pathname_.len,
            possibly_encoded_pathname.len,
        )];

        std.mem.copy(u8, possibly_encoded_pathname, possibly_encoded_pathname_[0..possibly_encoded_pathname.len]);
        var clone = possibly_encoded_pathname[0..possibly_encoded_pathname.len];

        var fbs = std.io.fixedBufferStream(
            // This is safe because:
            // - this comes from a non-const buffer
            // - percent *decoding* will always be <= length of the original string (no buffer overflow)
            toMutable(
                possibly_encoded_pathname,
            ),
        );
        var writer = fbs.writer();

        decoded_pathname = possibly_encoded_pathname[0..try PercentEncoding.decodeFaultTolerant(@TypeOf(writer), writer, clone, &needs_redirect, true)];
    }

    var question_mark_i: i16 = -1;
    var period_i: i16 = -1;
    var first_segment_end: i16 = std.math.maxInt(i16);
    var last_slash: i16 = -1;

    var i: i16 = @intCast(i16, decoded_pathname.len) - 1;

    while (i >= 0) : (i -= 1) {
        const c = decoded_pathname[@intCast(usize, i)];

        switch (c) {
            '?' => {
                question_mark_i = std.math.max(question_mark_i, i);
                if (question_mark_i < period_i) {
                    period_i = -1;
                }

                if (last_slash > question_mark_i) {
                    last_slash = -1;
                }
            },
            '.' => {
                period_i = std.math.max(period_i, i);
            },
            '/' => {
                last_slash = std.math.max(last_slash, i);

                if (i > 0) {
                    first_segment_end = std.math.min(first_segment_end, i);
                }
            },
            else => {},
        }
    }

    if (last_slash > period_i) {
        period_i = -1;
    }

    const extname = brk: {
        if (question_mark_i > -1 and period_i > -1) {
            period_i += 1;
            break :brk decoded_pathname[@intCast(usize, period_i)..@intCast(usize, question_mark_i)];
        } else if (period_i > -1) {
            period_i += 1;
            break :brk decoded_pathname[@intCast(usize, period_i)..];
        } else {
            break :brk &([_]u8{});
        }
    };

    const path = if (question_mark_i < 0) decoded_pathname[1..] else decoded_pathname[1..@intCast(usize, question_mark_i)];

    const first_segment = decoded_pathname[1..std.math.min(@intCast(usize, first_segment_end), decoded_pathname.len)];

    return URLPath{
        .extname = extname,
        .pathname = decoded_pathname,
        .first_segment = first_segment,
        .path = if (decoded_pathname.len == 1) "." else path,
        .query_string = if (question_mark_i > -1) decoded_pathname[@intCast(usize, question_mark_i)..@intCast(usize, decoded_pathname.len)] else "",
        .needs_redirect = needs_redirect,
    };
}