aboutsummaryrefslogtreecommitdiff
path: root/packages/bun-uws/src/Multipart.h
blob: 8538d642e29e9f13ee27c945258749325630ee4c (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
/*
 * Authored by Alex Hultman, 2018-2020.
 * Intellectual property of third-party.

 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at

 *     http://www.apache.org/licenses/LICENSE-2.0

 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* Implements the multipart protocol. Builds atop parts of our common http parser (not yet refactored that way). */
/* https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html */

#ifndef UWS_MULTIPART_H
#define UWS_MULTIPART_H

#include "MessageParser.h"

#include <string_view>
#include <optional>
#include <cstring>
#include <utility>
#include <cctype>

namespace uWS {

    /* This one could possibly be shared with ExtensionsParser to some degree */
    struct ParameterParser {

        /* Takes the line, commonly given as content-disposition header in the multipart */
        ParameterParser(std::string_view line) {
            remainingLine = line;
        }

        /* Returns next key/value where value can simply be empty.
         * If key (first) is empty then we are at the end */
        std::pair<std::string_view, std::string_view> getKeyValue() {
            auto key = getToken();
            auto op = getToken();

            if (!op.length()) {
                return {key, ""};
            }

            if (op[0] != ';') {
                auto value = getToken();
                /* Strip ; or if at end, nothing */
                getToken();
                return {key, value};
            }

            return {key, ""};
        }

    private:
        std::string_view remainingLine;

        /* Consumes a token from the line. Will "unquote" strings */
        std::string_view getToken() {
            /* Strip whitespace */
            while (remainingLine.length() && isspace(remainingLine[0])) {
                remainingLine.remove_prefix(1);
            }

            if (!remainingLine.length()) {
                /* All we had was space */
                return {};
            } else {
                /* Are we at an operator? */
                if (remainingLine[0] == ';' || remainingLine[0] == '=') {
                    auto op = remainingLine.substr(0, 1);
                    remainingLine.remove_prefix(1);
                    return op;
                } else {
                    /* Are we at a quoted string? */
                    if (remainingLine[0] == '\"') {
                        /* Remove first quote and start counting */
                        remainingLine.remove_prefix(1);
                        auto quote = remainingLine;
                        int quoteLength = 0;

                        /* Read anything until other double quote appears */
                        while (remainingLine.length() && remainingLine[0] != '\"') {
                            remainingLine.remove_prefix(1);
                            quoteLength++;
                        }

                        /* We can't remove_prefix if we have nothing to remove */
                        if (!remainingLine.length()) {
                            return {};
                        }

                        remainingLine.remove_prefix(1);
                        return quote.substr(0, quoteLength);
                    } else {
                        /* Read anything until ; = space or end */
                        std::string_view token = remainingLine;

                        int tokenLength = 0;
                        while (remainingLine.length() && remainingLine[0] != ';' && remainingLine[0] != '=' && !isspace(remainingLine[0])) {
                            remainingLine.remove_prefix(1);
                            tokenLength++;
                        }

                        return token.substr(0, tokenLength);
                    }
                }
            }

            /* Nothing */
            return "";
        }
    };

    struct MultipartParser {

        /* 2 chars of hyphen + 1 - 70 chars of boundary */
        char prependedBoundaryBuffer[72];
        std::string_view prependedBoundary;
        std::string_view remainingBody;
        bool first = true;

        /* I think it is more than sane to limit this to 10 per part */
        //static const int MAX_HEADERS = 10;

        /* Construct the parser based on contentType (reads boundary) */
        MultipartParser(std::string_view contentType) {

            /* We expect the form "multipart/something;somethingboundary=something" */
            if (contentType.length() < 10 || contentType.substr(0, 10) != "multipart/") {
                return;
            }

            /* For now we simply guess boundary will lie between = and end. This is not entirely
            * standards compliant as boundary may be expressed with or without " and spaces */
            auto equalToken = contentType.find('=', 10);
            if (equalToken != std::string_view::npos) {

                /* Boundary must be less than or equal to 70 chars yet 1 char or longer */
                std::string_view boundary = contentType.substr(equalToken + 1);
                if (!boundary.length() || boundary.length() > 70) {
                    /* Invalid size */
                    return;
                }

                /* Prepend it with two hyphens */
                prependedBoundaryBuffer[0] = prependedBoundaryBuffer[1] = '-';
                memcpy(&prependedBoundaryBuffer[2], boundary.data(), boundary.length());

                prependedBoundary = {prependedBoundaryBuffer, boundary.length() + 2};
            }
        }

        /* Is this even a valid multipart request? */
        bool isValid() {
            return prependedBoundary.length() != 0;
        }

        /* Set the body once, before getting any parts */
        void setBody(std::string_view body) {
            remainingBody = body;
        }

        /* Parse out the next part's data, filling the headers. Returns nullopt on end or error. */
        std::optional<std::string_view> getNextPart(std::pair<std::string_view, std::string_view> *headers) {

            /* The remaining two hyphens should be shorter than the boundary */
            if (remainingBody.length() < prependedBoundary.length()) {
                /* We are done now */
                return std::nullopt;
            }

            if (first) {
                auto nextBoundary = remainingBody.find(prependedBoundary);
                if (nextBoundary == std::string_view::npos) {
                    /* Cannot parse */
                    return std::nullopt;
                }

                /* Toss away boundary and anything before it */
                remainingBody.remove_prefix(nextBoundary + prependedBoundary.length());
                first = false;
            }

            auto nextEndBoundary = remainingBody.find(prependedBoundary);
            if (nextEndBoundary == std::string_view::npos) {
                /* Cannot parse (or simply done) */
                return std::nullopt;
            }

            std::string_view part = remainingBody.substr(0, nextEndBoundary);
            remainingBody.remove_prefix(nextEndBoundary + prependedBoundary.length());

            /* Also strip rn before and rn after the part */
            if (part.length() < 4) {
                /* Cannot strip */
                return std::nullopt;
            }
            part.remove_prefix(2);
            part.remove_suffix(2);

            /* We are allowed to post pad like this because we know the boundary is at least 2 bytes */
            /* This makes parsing a second pass invalid, so you can only iterate over parts once */
            memset((char *) part.data() + part.length(), '\r', 1);

            /* For this to be a valid part, we need to consume at least 4 bytes (\r\n\r\n) */
            int consumed = getHeaders((char *) part.data(), (char *) part.data() + part.length(), headers);

            if (!consumed) {
                /* This is an invalid part */
                return std::nullopt;
            }

            /* Strip away the headers from the part body data */
            part.remove_prefix(consumed);

            /* Now pass whatever is remaining of the part */
            return part;
        }
    };

}

#endif