db-lab1/bserv/utils.hpp

#ifndef _UTILS_HPP
#define _UTILS_HPP

#include <cstddef>
#include <string>
#include <tuple>
#include <vector>
#include <map>
#include <random>
#include <mutex>
#include <sstream>
#include <iomanip>

#include <cryptopp/cryptlib.h>
#include <cryptopp/pwdbased.h>
#include <cryptopp/sha.h>
#include <cryptopp/base64.h>

namespace bserv::utils {

namespace internal {

// NOTE:
// - `random_device` is implementation dependent.
//   it doesn't work with GNU GCC on Windows.
// - for thread-safety, do not directly use it.
//   use `get_rd_value` instead.
inline std::random_device rd;
inline std::mutex rd_mutex;

inline auto get_rd_value() {
    std::lock_guard<std::mutex> lg{rd_mutex};
    return rd();
}

// const std::string chars = "abcdefghijklmnopqrstuvwxyz"
//                           "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
//                           "1234567890"
//                           "!@#$%^&*()"
//                           "`~-_=+[{]}\\|;:'\",<.>/? ";

const std::string chars = "abcdefghijklmnopqrstuvwxyz"
                          "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                          "1234567890";


const std::string url_safe_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                                        "abcdefghijklmnopqrstuvwxyz"
                                        "0123456789-._~";

}  // internal

// https://www.boost.org/doc/libs/1_75_0/libs/random/example/password.cpp
inline std::string generate_random_string(std::size_t len) {
    std::string s;
    std::mt19937 rng{internal::get_rd_value()};
    std::uniform_int_distribution<> dist{0, (int) internal::chars.length() - 1};
    for (std::size_t i = 0; i < len; ++i) s += internal::chars[dist(rng)];
    return s;
}

namespace security {

// https://codahale.com/a-lesson-in-timing-attacks/
inline bool constant_time_compare(const std::string& a, const std::string& b) {
    if (a.length() != b.length())
        return false;
    int result = 0;
    for (std::size_t i = 0; i < a.length(); ++i)
        result |= a[i] ^ b[i];
    return result == 0;
}

// https://cryptopp.com/wiki/PKCS5_PBKDF2_HMAC
inline std::string hash_password(
    const std::string& password,
    const std::string& salt,
    unsigned int iterations = 20000 /*320000*/) {
    using namespace CryptoPP;
    byte derived[SHA256::DIGESTSIZE];
    PKCS5_PBKDF2_HMAC<SHA256> pbkdf;
	byte unused = 0;
    pbkdf.DeriveKey(derived, sizeof(derived), unused,
        (const byte*) password.c_str(), password.length(),
        (const byte*) salt.c_str(), salt.length(),
        iterations, 0.0f);
    std::string result;
    Base64Encoder encoder{new StringSink{result}, false};
    encoder.Put(derived, sizeof(derived));
    encoder.MessageEnd();
    return result;
}

inline std::string encode_password(const std::string& password) {
    std::string salt = generate_random_string(16);
    std::string hashed_password = hash_password(password, salt);
    return salt + '$' + hashed_password;
}

inline bool check_password(const std::string& password,
    const std::string& encoded_password) {
    std::string salt, hashed_password;
    std::string* a = &salt, * b = &hashed_password;
    for (std::size_t i = 0; i < encoded_password.length(); ++i) {
        if (encoded_password[i] != '$') {
            (*a) += encoded_password[i];
        } else {
            std::swap(a, b);
        }
    }
    return constant_time_compare(
        hash_password(password, salt), hashed_password);
}

}  // security

// reference for url:
// https://www.ietf.org/rfc/rfc3986.txt

// reserved    = gen-delims / sub-delims
// gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"
// sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
//             / "*" / "+" / "," / ";" / "="

// unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"

// https://stackoverflow.com/questions/54060359/encoding-decoded-urls-in-c
// there can be exceptions (std::stoi)!
inline std::string decode_url(const std::string& s) {
    std::string r;
    for (std::size_t i = 0; i < s.length(); ++i) {
        if (s[i] == '%') {
            int v = std::stoi(s.substr(i + 1, 2), nullptr, 16);
            r.push_back(0xff & v);
            i += 2;
        } else if (s[i] == '+') r.push_back(' ');
        else r.push_back(s[i]);
    }
    return r;
}

inline std::string encode_url(const std::string& s) {
    std::ostringstream oss;
    for (auto& c : s) {
        if (internal::url_safe_characters.find(c) != std::string::npos) {
            oss << c;
        } else {
            oss << '%' << std::setfill('0') << std::setw(2) <<
                std::uppercase << std::hex << (0xff & c);
        }
    }
    return oss.str();
}

// this function parses param list in the form of k1=v1&k2=v2...,
// where '&' can be any delimiter.
// ki and vi will be converted if they are percent-encoded,
// which is why the returned values are `string`, not `string_view`.
inline
std::pair<
    std::map<std::string, std::string>,
    std::map<std::string, std::vector<std::string>>>
parse_params(std::string& s, std::size_t start_pos = 0, char delimiter = '&') {
    std::map<std::string, std::string> dict_params;
    std::map<std::string, std::vector<std::string>> list_params;
    // we use the swap pointer technique
    // we will always append characters to *a only.
    std::string key, value, *a = &key, *b = &value;
    // append an extra `delimiter` so that the last key-value pair
    // is processed just like the other.
    s.push_back(delimiter);
    for (std::size_t i = start_pos; i < s.length(); ++i) {
        if (s[i] == '=') {
            std::swap(a, b);
        } else if (s[i] == delimiter) {
            // swap(a, b);
            a = &key;
            b = &value;
            // prevent ending with ' '
            while (!key.empty() && key.back() == ' ') key.pop_back();
            while (!value.empty() && value.back() == ' ') value.pop_back();
            if (key.empty() && value.empty())
                continue;
            key = decode_url(key);
            value = decode_url(value);
            // if `key` is in `list_params`, append `value`.
            auto p = list_params.find(key);
            if (p != list_params.end()) {
                list_params[key].push_back(value);
            } else { // `key` is not in `list_params`
                auto p = dict_params.find(key);
                // if `key` is in `dict_params`, 
                // move previous value and `value` to `list_params`
                // and remove `key` in `dict_params`.
                if (p != dict_params.end()) {
                    list_params[key] = {p->second, value};
                    dict_params.erase(p);
                } else { // `key` is not in `dict_params`
                    dict_params[key] = value;
                }
            }
            // clear `key` and `value`
            key = "";
            value = "";
        } else {
            // prevent beginning with ' '
            if (a->empty() && s[i] == ' ') {
                continue;
            }
            (*a) += s[i];
        }
    }
    // remove the last `delimiter` to restore `s` to what it was.
    s.pop_back();
    return std::make_pair(dict_params, list_params);
}

// this function parses url in the form of [url]?k1=v1&k2=v2...
// this function will convert ki and vi if they are percent-encoded.
// NOTE: don't misuse this function, it's going to modify
//       the parameter `s` in place!
inline
std::tuple<std::string,
           std::map<std::string, std::string>,
           std::map<std::string, std::vector<std::string>>>
parse_url(std::string& s) {
    std::string url;
    std::size_t i = 0;
    for (; i < s.length(); ++i) {
        if (s[i] != '?') {
            url += s[i];
        } else {
            break;
        }
    }
    if (i == s.length())
        return std::make_tuple(url,
            std::map<std::string, std::string>{},
            std::map<std::string, std::vector<std::string>>{});
    auto&& [dict_params, list_params] = parse_params(s, i + 1);
    return std::make_tuple(url, dict_params, list_params);
}

}  // bserv::utils

#endif  // _UTILS_HPP
initial commit 2021-03-05 07:39:47 +00:00			`#ifndef _UTILS_HPP`
			`#define _UTILS_HPP`

			`#include <cstddef>`
			`#include <string>`
			`#include <tuple>`
			`#include <vector>`
			`#include <map>`
			`#include <random>`
			`#include <mutex>`
add request 2021-03-13 11:33:01 +00:00			`#include <sstream>`
			`#include <iomanip>`
initial commit 2021-03-05 07:39:47 +00:00
			`#include <cryptopp/cryptlib.h>`
			`#include <cryptopp/pwdbased.h>`
			`#include <cryptopp/sha.h>`
			`#include <cryptopp/base64.h>`

			`namespace bserv::utils {`

			`namespace internal {`

			`// NOTE:`
			// - `random_device` is implementation dependent.
			`// it doesn't work with GNU GCC on Windows.`
			`// - for thread-safety, do not directly use it.`
			// use `get_rd_value` instead.
add websocket support & replace promise/future with coroutine 2021-08-07 13:56:19 +01:00			`inline std::random_device rd;`
			`inline std::mutex rd_mutex;`
initial commit 2021-03-05 07:39:47 +00:00
add websocket support & replace promise/future with coroutine 2021-08-07 13:56:19 +01:00			`inline auto get_rd_value() {`
initial commit 2021-03-05 07:39:47 +00:00			`std::lock_guard<std::mutex> lg{rd_mutex};`
			`return rd();`
			`}`

			`// const std::string chars = "abcdefghijklmnopqrstuvwxyz"`
			`// "ABCDEFGHIJKLMNOPQRSTUVWXYZ"`
			`// "1234567890"`
			`// "!@#$%^&*()"`
			// "`~-_=+[{]}\\\|;:'\",<.>/? ";

			`const std::string chars = "abcdefghijklmnopqrstuvwxyz"`
			`"ABCDEFGHIJKLMNOPQRSTUVWXYZ"`
			`"1234567890";`

add request 2021-03-13 11:33:01 +00:00
			`const std::string url_safe_characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"`
			`"abcdefghijklmnopqrstuvwxyz"`
			`"0123456789-._~";`

initial commit 2021-03-05 07:39:47 +00:00			`} // internal`

			`// https://www.boost.org/doc/libs/1_75_0/libs/random/example/password.cpp`
add websocket support & replace promise/future with coroutine 2021-08-07 13:56:19 +01:00			`inline std::string generate_random_string(std::size_t len) {`
initial commit 2021-03-05 07:39:47 +00:00			`std::string s;`
			`std::mt19937 rng{internal::get_rd_value()};`
			`std::uniform_int_distribution<> dist{0, (int) internal::chars.length() - 1};`
			`for (std::size_t i = 0; i < len; ++i) s += internal::chars[dist(rng)];`
			`return s;`
			`}`

			`namespace security {`

			`// https://codahale.com/a-lesson-in-timing-attacks/`
add websocket support & replace promise/future with coroutine 2021-08-07 13:56:19 +01:00			`inline bool constant_time_compare(const std::string& a, const std::string& b) {`
initial commit 2021-03-05 07:39:47 +00:00			`if (a.length() != b.length())`
			`return false;`
			`int result = 0;`
			`for (std::size_t i = 0; i < a.length(); ++i)`
			`result \|= a[i] ^ b[i];`
			`return result == 0;`
			`}`

			`// https://cryptopp.com/wiki/PKCS5_PBKDF2_HMAC`
add websocket support & replace promise/future with coroutine 2021-08-07 13:56:19 +01:00			`inline std::string hash_password(`
initial commit 2021-03-05 07:39:47 +00:00			`const std::string& password,`
			`const std::string& salt,`
			`unsigned int iterations = 20000 /320000/) {`
			`using namespace CryptoPP;`
			`byte derived[SHA256::DIGESTSIZE];`
			`PKCS5_PBKDF2_HMAC<SHA256> pbkdf;`
			`byte unused = 0;`
			`pbkdf.DeriveKey(derived, sizeof(derived), unused,`
			`(const byte*) password.c_str(), password.length(),`
			`(const byte*) salt.c_str(), salt.length(),`
			`iterations, 0.0f);`
			`std::string result;`
			`Base64Encoder encoder{new StringSink{result}, false};`
			`encoder.Put(derived, sizeof(derived));`
			`encoder.MessageEnd();`
			`return result;`
			`}`

add websocket support & replace promise/future with coroutine 2021-08-07 13:56:19 +01:00			`inline std::string encode_password(const std::string& password) {`
initial commit 2021-03-05 07:39:47 +00:00			`std::string salt = generate_random_string(16);`
			`std::string hashed_password = hash_password(password, salt);`
			`return salt + '$' + hashed_password;`
			`}`

add websocket support & replace promise/future with coroutine 2021-08-07 13:56:19 +01:00			`inline bool check_password(const std::string& password,`
initial commit 2021-03-05 07:39:47 +00:00			`const std::string& encoded_password) {`
			`std::string salt, hashed_password;`
			`std::string* a = &salt, * b = &hashed_password;`
			`for (std::size_t i = 0; i < encoded_password.length(); ++i) {`
			`if (encoded_password[i] != '$') {`
			`(*a) += encoded_password[i];`
			`} else {`
			`std::swap(a, b);`
			`}`
			`}`
			`return constant_time_compare(`
			`hash_password(password, salt), hashed_password);`
			`}`

			`} // security`

			`// reference for url:`
			`// https://www.ietf.org/rfc/rfc3986.txt`

			`// reserved = gen-delims / sub-delims`
			`// gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"`
			`// sub-delims = "!" / "$" / "&" / "'" / "(" / ")"`
			`// / "*" / "+" / "," / ";" / "="`

			`// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"`

			`// https://stackoverflow.com/questions/54060359/encoding-decoded-urls-in-c`
			`// there can be exceptions (std::stoi)!`
add websocket support & replace promise/future with coroutine 2021-08-07 13:56:19 +01:00			`inline std::string decode_url(const std::string& s) {`
initial commit 2021-03-05 07:39:47 +00:00			`std::string r;`
			`for (std::size_t i = 0; i < s.length(); ++i) {`
			`if (s[i] == '%') {`
			`int v = std::stoi(s.substr(i + 1, 2), nullptr, 16);`
			`r.push_back(0xff & v);`
			`i += 2;`
			`} else if (s[i] == '+') r.push_back(' ');`
			`else r.push_back(s[i]);`
			`}`
			`return r;`
			`}`

add websocket support & replace promise/future with coroutine 2021-08-07 13:56:19 +01:00			`inline std::string encode_url(const std::string& s) {`
add request 2021-03-13 11:33:01 +00:00			`std::ostringstream oss;`
			`for (auto& c : s) {`
			`if (internal::url_safe_characters.find(c) != std::string::npos) {`
			`oss << c;`
			`} else {`
			`oss << '%' << std::setfill('0') << std::setw(2) <<`
			`std::uppercase << std::hex << (0xff & c);`
			`}`
			`}`
			`return oss.str();`
			`}`

initial commit 2021-03-05 07:39:47 +00:00			`// this function parses param list in the form of k1=v1&k2=v2...,`
			`// where '&' can be any delimiter.`
			`// ki and vi will be converted if they are percent-encoded,`
			// which is why the returned values are `string`, not `string_view`.
add websocket support & replace promise/future with coroutine 2021-08-07 13:56:19 +01:00			`inline`
initial commit 2021-03-05 07:39:47 +00:00			`std::pair<`
			`std::map<std::string, std::string>,`
			`std::map<std::string, std::vector<std::string>>>`
			`parse_params(std::string& s, std::size_t start_pos = 0, char delimiter = '&') {`
			`std::map<std::string, std::string> dict_params;`
			`std::map<std::string, std::vector<std::string>> list_params;`
			`// we use the swap pointer technique`
			`// we will always append characters to *a only.`
			`std::string key, value, a = &key, b = &value;`
			// append an extra `delimiter` so that the last key-value pair
			`// is processed just like the other.`
			`s.push_back(delimiter);`
			`for (std::size_t i = start_pos; i < s.length(); ++i) {`
			`if (s[i] == '=') {`
			`std::swap(a, b);`
			`} else if (s[i] == delimiter) {`
			`// swap(a, b);`
			`a = &key;`
			`b = &value;`
			`// prevent ending with ' '`
			`while (!key.empty() && key.back() == ' ') key.pop_back();`
			`while (!value.empty() && value.back() == ' ') value.pop_back();`
			`if (key.empty() && value.empty())`
			`continue;`
			`key = decode_url(key);`
			`value = decode_url(value);`
			// if `key` is in `list_params`, append `value`.
			`auto p = list_params.find(key);`
			`if (p != list_params.end()) {`
			`list_params[key].push_back(value);`
			} else { // `key` is not in `list_params`
			`auto p = dict_params.find(key);`
			// if `key` is in `dict_params`,
			// move previous value and `value` to `list_params`
			// and remove `key` in `dict_params`.
			`if (p != dict_params.end()) {`
			`list_params[key] = {p->second, value};`
			`dict_params.erase(p);`
			} else { // `key` is not in `dict_params`
			`dict_params[key] = value;`
			`}`
			`}`
			// clear `key` and `value`
			`key = "";`
			`value = "";`
			`} else {`
			`// prevent beginning with ' '`
			`if (a->empty() && s[i] == ' ') {`
			`continue;`
			`}`
			`(*a) += s[i];`
			`}`
			`}`
			// remove the last `delimiter` to restore `s` to what it was.
			`s.pop_back();`
			`return std::make_pair(dict_params, list_params);`
			`}`

			`// this function parses url in the form of [url]?k1=v1&k2=v2...`
			`// this function will convert ki and vi if they are percent-encoded.`
			`// NOTE: don't misuse this function, it's going to modify`
			// the parameter `s` in place!
add websocket support & replace promise/future with coroutine 2021-08-07 13:56:19 +01:00			`inline`
initial commit 2021-03-05 07:39:47 +00:00			`std::tuple<std::string,`
			`std::map<std::string, std::string>,`
			`std::map<std::string, std::vector<std::string>>>`
			`parse_url(std::string& s) {`
			`std::string url;`
			`std::size_t i = 0;`
			`for (; i < s.length(); ++i) {`
			`if (s[i] != '?') {`
			`url += s[i];`
			`} else {`
			`break;`
			`}`
			`}`
			`if (i == s.length())`
			`return std::make_tuple(url,`
			`std::map<std::string, std::string>{},`
			`std::map<std::string, std::vector<std::string>>{});`
			`auto&& [dict_params, list_params] = parse_params(s, i + 1);`
			`return std::make_tuple(url, dict_params, list_params);`
			`}`

			`} // bserv::utils`

			`#endif // _UTILS_HPP`