From a0e66fb9e6c5c174ecf556bce9ef38e49d1abc30 Mon Sep 17 00:00:00 2001 From: Sergey Lyubka Date: Mon, 18 Dec 2023 19:08:51 +0000 Subject: [PATCH] Add built-in TLS 1.3 stack: server side, EC support --- Makefile | 3 +- examples/device-dashboard/Makefile | 2 + examples/device-dashboard/net.c | 12 +- examples/device-dashboard/packed_fs.c | 4 +- examples/rp2040/pico-rndis-device/main.c | 13 +- .../Makefile | 1 + .../main.c | 3 +- mongoose.c | 5936 +++++++++++++++-- mongoose.h | 1067 ++- src/base64.c | 10 +- src/http.c | 5 +- src/ota.h | 2 +- src/sha256.c | 160 + src/sha256.h | 16 + src/tls_aes128.c | 1002 +++ src/tls_aes128.h | 263 + src/tls_builtin.c | 1010 ++- src/tls_uecc.c | 3173 +++++++++ src/tls_uecc.h | 638 ++ 19 files changed, 12683 insertions(+), 637 deletions(-) create mode 100644 src/sha256.c create mode 100644 src/sha256.h create mode 100644 src/tls_aes128.c create mode 100644 src/tls_aes128.h create mode 100644 src/tls_uecc.c create mode 100644 src/tls_uecc.h diff --git a/Makefile b/Makefile index ea24e1c8..818a42cc 100644 --- a/Makefile +++ b/Makefile @@ -177,7 +177,8 @@ mongoose.c: Makefile $(wildcard src/*.c) $(wildcard src/drivers/*.c) (cat src/license.h; echo; echo '#include "mongoose.h"' ; (for F in src/*.c src/drivers/*.c ; do echo; echo '#ifdef MG_ENABLE_LINES'; echo "#line 1 \"$$F\""; echo '#endif'; cat $$F | sed -e 's,#include ".*,,'; done))> $@ mongoose.h: $(HDRS) Makefile - (cat src/license.h; echo; echo '#ifndef MONGOOSE_H'; echo '#define MONGOOSE_H'; echo; cat src/version.h ; echo; echo '#ifdef __cplusplus'; echo 'extern "C" {'; echo '#endif'; cat src/arch.h src/arch_*.h src/net_ft.h src/net_lwip.h src/net_rl.h src/config.h src/str.h src/queue.h src/fmt.h src/printf.h src/log.h src/timer.h src/fs.h src/util.h src/url.h src/iobuf.h src/base64.h src/md5.h src/sha1.h src/event.h src/net.h src/profile.h src/http.h src/ssi.h src/tls.h src/tls_mbed.h src/tls_openssl.h src/ws.h src/sntp.h src/mqtt.h src/dns.h src/json.h src/rpc.h src/ota.h src/device.h src/net_builtin.h src/drivers/*.h | sed -e '/keep/! s,#include ".*,,' -e 's,^#pragma once,,'; echo; echo '#ifdef __cplusplus'; echo '}'; echo '#endif'; echo '#endif // MONGOOSE_H')> $@ + (cat src/license.h; echo; echo '#ifndef MONGOOSE_H'; echo '#define MONGOOSE_H'; echo; cat src/version.h ; echo; echo '#ifdef __cplusplus'; echo 'extern "C" {'; echo '#endif'; cat src/arch.h src/arch_*.h src/net_ft.h src/net_lwip.h src/net_rl.h src/config.h src/str.h src/queue.h src/fmt.h src/printf.h src/log.h src/timer.h src/fs.h src/util.h src/url.h src/iobuf.h src/base64.h src/md5.h src/sha1.h src/sha256.h src/tls_aes128.h src/tls_uecc.h src/event.h src/net.h src/http.h src/ssi.h src/tls.h src/tls_mbed.h src/tls_openssl.h src/ws.h src/sntp.h src/mqtt.h src/dns.h src/json.h src/rpc.h src/ota.h src/device.h src/net_builtin.h src/profile.h src/drivers/*.h | sed -e '/keep/! s,#include ".*,,' -e 's,^#pragma once,,'; echo; echo '#ifdef __cplusplus'; echo '}'; echo '#endif'; echo '#endif // MONGOOSE_H')> $@ + clean: clean_examples clean_embedded rm -rf $(PROG) *.exe *.o *.dSYM *_test* ut fuzzer *.gcov *.gcno *.gcda *.obj *.exe *.ilk *.pdb slow-unit* _CL_* infer-out data.txt crash-* test/packed_fs.c pack diff --git a/examples/device-dashboard/Makefile b/examples/device-dashboard/Makefile index 64c3f27b..a3a8fa91 100644 --- a/examples/device-dashboard/Makefile +++ b/examples/device-dashboard/Makefile @@ -49,6 +49,8 @@ ifeq ($(TLS), mbedtls) CFLAGS += -DMG_TLS=MG_TLS_MBED -Wno-conversion -Imbedtls/include CFLAGS += -DMBEDTLS_CONFIG_FILE=\"mbedtls_config.h\" mbedtls/library/*.c $(PROG): mbedtls +else +CFLAGS += -DMG_TLS=MG_TLS_BUILTIN endif # Cleanup. Delete built program and all build artifacts diff --git a/examples/device-dashboard/net.c b/examples/device-dashboard/net.c index a180af13..91d29511 100644 --- a/examples/device-dashboard/net.c +++ b/examples/device-dashboard/net.c @@ -123,9 +123,9 @@ static void handle_debug(struct mg_connection *c, struct mg_http_message *hm) { } static size_t print_int_arr(void (*out)(char, void *), void *ptr, va_list *ap) { - size_t len = 0, num = va_arg(*ap, size_t); // Number of items in the array + size_t i, len = 0, num = va_arg(*ap, size_t); // Number of items in the array int *arr = va_arg(*ap, int *); // Array ptr - for (size_t i = 0; i < num; i++) { + for (i = 0; i < num; i++) { len += mg_xprintf(out, ptr, "%s%d", i == 0 ? "" : ",", arr[i]); } return len; @@ -168,21 +168,19 @@ static void handle_events_get(struct mg_connection *c, static void handle_settings_set(struct mg_connection *c, struct mg_str body) { struct settings settings; + char *s = mg_json_get_str(body, "$.device_name"); + bool ok = true; memset(&settings, 0, sizeof(settings)); mg_json_get_bool(body, "$.log_enabled", &settings.log_enabled); settings.log_level = mg_json_get_long(body, "$.log_level", 0); settings.brightness = mg_json_get_long(body, "$.brightness", 0); - char *s = mg_json_get_str(body, "$.device_name"); if (s && strlen(s) < MAX_DEVICE_NAME) { free(settings.device_name); settings.device_name = s; } else { free(s); } - - // Save to the device flash - s_settings = settings; - bool ok = true; + s_settings = settings; // Save to the device flash mg_http_reply(c, 200, s_json_header, "{%m:%s,%m:%m}", // MG_ESC("status"), ok ? "true" : "false", // diff --git a/examples/device-dashboard/packed_fs.c b/examples/device-dashboard/packed_fs.c index 866108f7..0ed552b8 100644 --- a/examples/device-dashboard/packed_fs.c +++ b/examples/device-dashboard/packed_fs.c @@ -652,7 +652,7 @@ static const unsigned char v1[] = { 0, 0 // . }; static const unsigned char v2[] = { - 31, 139, 8, 8, 246, 5, 126, 101, 0, 3, 99, 111, // ......~e..co + 31, 139, 8, 8, 87, 102, 129, 101, 0, 3, 99, 111, // ....Wf.e..co 109, 112, 111, 110, 101, 110, 116, 115, 46, 106, 115, 0, // mponents.js. 237, 93, 235, 115, 219, 70, 146, 255, 238, 191, 98, 162, // .].s.F....b. 242, 45, 169, 181, 0, 225, 77, 64, 182, 148, 114, 156, // .-....M@..r. @@ -2668,7 +2668,7 @@ static const struct packed_file { time_t mtime; } packed_files[] = { {"/web_root/bundle.js.gz", v1, sizeof(v1), 1695912421}, - {"/web_root/components.js.gz", v2, sizeof(v2), 1702757878}, + {"/web_root/components.js.gz", v2, sizeof(v2), 1702979159}, {"/web_root/history.min.js.gz", v3, sizeof(v3), 1695912421}, {"/web_root/index.html.gz", v4, sizeof(v4), 1693654553}, {"/web_root/main.css.gz", v5, sizeof(v5), 1702757929}, diff --git a/examples/rp2040/pico-rndis-device/main.c b/examples/rp2040/pico-rndis-device/main.c index 08de97cc..ae38df94 100644 --- a/examples/rp2040/pico-rndis-device/main.c +++ b/examples/rp2040/pico-rndis-device/main.c @@ -10,8 +10,13 @@ bool hal_gpio_read(int pin) { return (pin >= 0 && pin <= 29) ? gpio_get_out_level((uint) pin) : false; } -void hal_gpio_write(int pin, bool val) { - if (pin >= 0 && pin <= 29) gpio_put((uint) pin, val); +bool hal_gpio_write(int pin, bool val) { + if (pin >= 0 && pin <= 29) { + gpio_put((uint) pin, val); + return true; + } else { + return false; + } } int hal_led_pin(void) { @@ -56,6 +61,10 @@ static void fn(struct mg_connection *c, int ev, void *ev_data, void *fn_dta) { if (ev == MG_EV_HTTP_MSG) return mg_http_reply(c, 200, "", "ok\n"); } +uint64_t mg_now(void) { + return mg_millis(); +} + int main(void) { gpio_init(PICO_DEFAULT_LED_PIN); gpio_set_dir(PICO_DEFAULT_LED_PIN, GPIO_OUT); diff --git a/examples/stm32/nucleo-f746zg-make-baremetal-builtin/Makefile b/examples/stm32/nucleo-f746zg-make-baremetal-builtin/Makefile index 28a3b61b..8974c42e 100644 --- a/examples/stm32/nucleo-f746zg-make-baremetal-builtin/Makefile +++ b/examples/stm32/nucleo-f746zg-make-baremetal-builtin/Makefile @@ -13,6 +13,7 @@ SOURCES += mongoose.c net.c packed_fs.c # Example specific build options. See README.md CFLAGS += -DHTTP_URL=\"http://0.0.0.0/\" -DHTTPS_URL=\"https://0.0.0.0/\" +CFLAGS += -DMG_TLS=MG_TLS_BUILTIN ifeq ($(OS),Windows_NT) RM = cmd /C del /Q /F /S diff --git a/examples/stm32/nucleo-h723zg-make-baremetal-builtin/main.c b/examples/stm32/nucleo-h723zg-make-baremetal-builtin/main.c index d50ecd1a..d31afea5 100644 --- a/examples/stm32/nucleo-h723zg-make-baremetal-builtin/main.c +++ b/examples/stm32/nucleo-h723zg-make-baremetal-builtin/main.c @@ -24,8 +24,9 @@ void mg_random(void *buf, size_t len) { // Use on-board RNG } #ifdef MQTT_DASHBOARD -void hal_gpio_write(int pin, bool status) { // For MQTT dashboard HAL +bool hal_gpio_write(int pin, bool status) { // For MQTT dashboard HAL gpio_write((uint16_t) pin, status); + return true; } bool hal_gpio_read(int pin) { // For MQTT dashboard HAL diff --git a/mongoose.c b/mongoose.c index e5a9bfa9..e1aaf3ac 100644 --- a/mongoose.c +++ b/mongoose.c @@ -93,14 +93,15 @@ size_t mg_base64_encode(const unsigned char *p, size_t n, char *to, size_t dl) { size_t mg_base64_decode(const char *src, size_t n, char *dst, size_t dl) { const char *end = src == NULL ? NULL : src + n; // Cannot add to NULL size_t len = 0; - if (dl > 0) dst[0] = '\0'; - if (dl < n / 4 * 3 + 1) return 0; + if (dl < n / 4 * 3 + 1) goto fail; while (src != NULL && src + 3 < end) { int a = mg_base64_decode_single(src[0]), b = mg_base64_decode_single(src[1]), c = mg_base64_decode_single(src[2]), d = mg_base64_decode_single(src[3]); - if (a == 64 || a < 0 || b == 64 || b < 0 || c < 0 || d < 0) return 0; + if (a == 64 || a < 0 || b == 64 || b < 0 || c < 0 || d < 0) { + goto fail; + } dst[len++] = (char) ((a << 2) | (b >> 4)); if (src[2] != '=') { dst[len++] = (char) ((b << 4) | (c >> 2)); @@ -110,6 +111,9 @@ size_t mg_base64_decode(const char *src, size_t n, char *dst, size_t dl) { } dst[len] = '\0'; return len; +fail: + if (dl > 0) dst[0] = '\0'; + return 0; } #ifdef MG_ENABLE_LINES @@ -2050,7 +2054,7 @@ static bool vcb(uint8_t c) { // Get character length (valid utf-8). Used to parse method, URI, headers static size_t clen(const char *s, const char *end) { const unsigned char *u = (unsigned char *) s, c = *u; - long n = end - s; + long n = (long) (end - s); if (c > ' ' && c < '~') return 1; // Usual ascii printed char if ((c & 0xe0) == 0xc0 && n > 1 && vcb(u[1])) return 2; // 2-byte UTF8 if ((c & 0xf0) == 0xe0 && n > 2 && vcb(u[1]) && vcb(u[2])) return 3; @@ -2832,7 +2836,8 @@ static void http_cb(struct mg_connection *c, int ev, void *evd, void *fnd) { struct mg_str *te; // Transfer - encoding header bool is_chunked = false; if (n < 0) { - mg_error(c, "HTTP parse"); + mg_error(c, "HTTP parse, %lu bytes", c->recv.len); + mg_hexdump(c->recv.buf, c->recv.len > 16 ? 16 : c->recv.len); return; } if (n == 0) break; // Request is not buffered yet @@ -4183,6 +4188,288 @@ struct mg_connection *mg_mqtt_listen(struct mg_mgr *mgr, const char *url, return c; } +#ifdef MG_ENABLE_LINES +#line 1 "src/net.c" +#endif + + + + + + + + + +size_t mg_vprintf(struct mg_connection *c, const char *fmt, va_list *ap) { + size_t old = c->send.len; + mg_vxprintf(mg_pfn_iobuf, &c->send, fmt, ap); + return c->send.len - old; +} + +size_t mg_printf(struct mg_connection *c, const char *fmt, ...) { + size_t len = 0; + va_list ap; + va_start(ap, fmt); + len = mg_vprintf(c, fmt, &ap); + va_end(ap); + return len; +} + +static bool mg_atonl(struct mg_str str, struct mg_addr *addr) { + uint32_t localhost = mg_htonl(0x7f000001); + if (mg_vcasecmp(&str, "localhost") != 0) return false; + memcpy(addr->ip, &localhost, sizeof(uint32_t)); + addr->is_ip6 = false; + return true; +} + +static bool mg_atone(struct mg_str str, struct mg_addr *addr) { + if (str.len > 0) return false; + memset(addr->ip, 0, sizeof(addr->ip)); + addr->is_ip6 = false; + return true; +} + +static bool mg_aton4(struct mg_str str, struct mg_addr *addr) { + uint8_t data[4] = {0, 0, 0, 0}; + size_t i, num_dots = 0; + for (i = 0; i < str.len; i++) { + if (str.ptr[i] >= '0' && str.ptr[i] <= '9') { + int octet = data[num_dots] * 10 + (str.ptr[i] - '0'); + if (octet > 255) return false; + data[num_dots] = (uint8_t) octet; + } else if (str.ptr[i] == '.') { + if (num_dots >= 3 || i == 0 || str.ptr[i - 1] == '.') return false; + num_dots++; + } else { + return false; + } + } + if (num_dots != 3 || str.ptr[i - 1] == '.') return false; + memcpy(&addr->ip, data, sizeof(data)); + addr->is_ip6 = false; + return true; +} + +static bool mg_v4mapped(struct mg_str str, struct mg_addr *addr) { + int i; + uint32_t ipv4; + if (str.len < 14) return false; + if (str.ptr[0] != ':' || str.ptr[1] != ':' || str.ptr[6] != ':') return false; + for (i = 2; i < 6; i++) { + if (str.ptr[i] != 'f' && str.ptr[i] != 'F') return false; + } + // struct mg_str s = mg_str_n(&str.ptr[7], str.len - 7); + if (!mg_aton4(mg_str_n(&str.ptr[7], str.len - 7), addr)) return false; + memcpy(&ipv4, addr->ip, sizeof(ipv4)); + memset(addr->ip, 0, sizeof(addr->ip)); + addr->ip[10] = addr->ip[11] = 255; + memcpy(&addr->ip[12], &ipv4, 4); + addr->is_ip6 = true; + return true; +} + +static bool mg_aton6(struct mg_str str, struct mg_addr *addr) { + size_t i, j = 0, n = 0, dc = 42; + addr->scope_id = 0; + if (str.len > 2 && str.ptr[0] == '[') str.ptr++, str.len -= 2; + if (mg_v4mapped(str, addr)) return true; + for (i = 0; i < str.len; i++) { + if ((str.ptr[i] >= '0' && str.ptr[i] <= '9') || + (str.ptr[i] >= 'a' && str.ptr[i] <= 'f') || + (str.ptr[i] >= 'A' && str.ptr[i] <= 'F')) { + unsigned long val; + if (i > j + 3) return false; + // MG_DEBUG(("%lu %lu [%.*s]", i, j, (int) (i - j + 1), &str.ptr[j])); + val = mg_unhexn(&str.ptr[j], i - j + 1); + addr->ip[n] = (uint8_t) ((val >> 8) & 255); + addr->ip[n + 1] = (uint8_t) (val & 255); + } else if (str.ptr[i] == ':') { + j = i + 1; + if (i > 0 && str.ptr[i - 1] == ':') { + dc = n; // Double colon + if (i > 1 && str.ptr[i - 2] == ':') return false; + } else if (i > 0) { + n += 2; + } + if (n > 14) return false; + addr->ip[n] = addr->ip[n + 1] = 0; // For trailing :: + } else if (str.ptr[i] == '%') { // Scope ID + for (i = i + 1; i < str.len; i++) { + if (str.ptr[i] < '0' || str.ptr[i] > '9') return false; + addr->scope_id = (uint8_t) (addr->scope_id * 10); + addr->scope_id = (uint8_t) (addr->scope_id + (str.ptr[i] - '0')); + } + } else { + return false; + } + } + if (n < 14 && dc == 42) return false; + if (n < 14) { + memmove(&addr->ip[dc + (14 - n)], &addr->ip[dc], n - dc + 2); + memset(&addr->ip[dc], 0, 14 - n); + } + + addr->is_ip6 = true; + return true; +} + +bool mg_aton(struct mg_str str, struct mg_addr *addr) { + // MG_INFO(("[%.*s]", (int) str.len, str.ptr)); + return mg_atone(str, addr) || mg_atonl(str, addr) || mg_aton4(str, addr) || + mg_aton6(str, addr); +} + +struct mg_connection *mg_alloc_conn(struct mg_mgr *mgr) { + struct mg_connection *c = + (struct mg_connection *) calloc(1, sizeof(*c) + mgr->extraconnsize); + if (c != NULL) { + c->mgr = mgr; + c->send.align = c->recv.align = c->rtls.align = MG_IO_SIZE; + c->id = ++mgr->nextid; + MG_PROF_INIT(c); + } + return c; +} + +void mg_close_conn(struct mg_connection *c) { + mg_resolve_cancel(c); // Close any pending DNS query + LIST_DELETE(struct mg_connection, &c->mgr->conns, c); + if (c == c->mgr->dns4.c) c->mgr->dns4.c = NULL; + if (c == c->mgr->dns6.c) c->mgr->dns6.c = NULL; + // Order of operations is important. `MG_EV_CLOSE` event must be fired + // before we deallocate received data, see #1331 + mg_call(c, MG_EV_CLOSE, NULL); + MG_DEBUG(("%lu %ld closed", c->id, c->fd)); + MG_PROF_DUMP(c); + MG_PROF_FREE(c); + + mg_tls_free(c); + mg_iobuf_free(&c->recv); + mg_iobuf_free(&c->send); + mg_iobuf_free(&c->rtls); + mg_bzero((unsigned char *) c, sizeof(*c)); + free(c); +} + +struct mg_connection *mg_connect(struct mg_mgr *mgr, const char *url, + mg_event_handler_t fn, void *fn_data) { + struct mg_connection *c = NULL; + if (url == NULL || url[0] == '\0') { + MG_ERROR(("null url")); + } else if ((c = mg_alloc_conn(mgr)) == NULL) { + MG_ERROR(("OOM")); + } else { + LIST_ADD_HEAD(struct mg_connection, &mgr->conns, c); + c->is_udp = (strncmp(url, "udp:", 4) == 0); + c->fd = (void *) (size_t) MG_INVALID_SOCKET; + c->fn = fn; + c->is_client = true; + c->fn_data = fn_data; + MG_DEBUG(("%lu %ld %s", c->id, c->fd, url)); + mg_call(c, MG_EV_OPEN, (void *) url); + mg_resolve(c, url); + } + return c; +} + +struct mg_connection *mg_listen(struct mg_mgr *mgr, const char *url, + mg_event_handler_t fn, void *fn_data) { + struct mg_connection *c = NULL; + if ((c = mg_alloc_conn(mgr)) == NULL) { + MG_ERROR(("OOM %s", url)); + } else if (!mg_open_listener(c, url)) { + MG_ERROR(("Failed: %s, errno %d", url, errno)); + MG_PROF_FREE(c); + free(c); + c = NULL; + } else { + c->is_listening = 1; + c->is_udp = strncmp(url, "udp:", 4) == 0; + LIST_ADD_HEAD(struct mg_connection, &mgr->conns, c); + c->fn = fn; + c->fn_data = fn_data; + mg_call(c, MG_EV_OPEN, NULL); + if (mg_url_is_ssl(url)) c->is_tls = 1; // Accepted connection must + MG_DEBUG(("%lu %ld %s", c->id, c->fd, url)); + } + return c; +} + +struct mg_connection *mg_wrapfd(struct mg_mgr *mgr, int fd, + mg_event_handler_t fn, void *fn_data) { + struct mg_connection *c = mg_alloc_conn(mgr); + if (c != NULL) { + c->fd = (void *) (size_t) fd; + c->fn = fn; + c->fn_data = fn_data; + MG_EPOLL_ADD(c); + mg_call(c, MG_EV_OPEN, NULL); + LIST_ADD_HEAD(struct mg_connection, &mgr->conns, c); + } + return c; +} + +struct mg_timer *mg_timer_add(struct mg_mgr *mgr, uint64_t milliseconds, + unsigned flags, void (*fn)(void *), void *arg) { + struct mg_timer *t = (struct mg_timer *) calloc(1, sizeof(*t)); + if (t != NULL) { + mg_timer_init(&mgr->timers, t, milliseconds, flags, fn, arg); + t->id = mgr->timerid++; + } + return t; +} + +long mg_io_recv(struct mg_connection *c, void *buf, size_t len) { + if (c->rtls.len == 0) return MG_IO_WAIT; + if (len > c->rtls.len) len = c->rtls.len; + memcpy(buf, c->rtls.buf, len); + mg_iobuf_del(&c->rtls, 0, len); + return (long) len; +} + +void mg_mgr_free(struct mg_mgr *mgr) { + struct mg_connection *c; + struct mg_timer *tmp, *t = mgr->timers; + while (t != NULL) tmp = t->next, free(t), t = tmp; + mgr->timers = NULL; // Important. Next call to poll won't touch timers + for (c = mgr->conns; c != NULL; c = c->next) c->is_closing = 1; + mg_mgr_poll(mgr, 0); +#if MG_ENABLE_FREERTOS_TCP + FreeRTOS_DeleteSocketSet(mgr->ss); +#endif + MG_DEBUG(("All connections closed")); +#if MG_ENABLE_EPOLL + if (mgr->epoll_fd >= 0) close(mgr->epoll_fd), mgr->epoll_fd = -1; +#endif + mg_tls_ctx_free(mgr); +} + +void mg_mgr_init(struct mg_mgr *mgr) { + memset(mgr, 0, sizeof(*mgr)); +#if MG_ENABLE_EPOLL + if ((mgr->epoll_fd = epoll_create1(EPOLL_CLOEXEC)) < 0) + MG_ERROR(("epoll_create1 errno %d", errno)); +#else + mgr->epoll_fd = -1; +#endif +#if MG_ARCH == MG_ARCH_WIN32 && MG_ENABLE_WINSOCK + // clang-format off + { WSADATA data; WSAStartup(MAKEWORD(2, 2), &data); } + // clang-format on +#elif MG_ENABLE_FREERTOS_TCP + mgr->ss = FreeRTOS_CreateSocketSet(); +#elif defined(__unix) || defined(__unix__) || defined(__APPLE__) + // Ignore SIGPIPE signal, so if client cancels the request, it + // won't kill the whole process. + signal(SIGPIPE, SIG_IGN); +#endif + mgr->dnstimeout = 3000; + mgr->dns4.url = "udp://8.8.8.8:53"; + mgr->dns6.url = "udp://[2001:4860:4860::8888]:53"; + mg_tls_ctx_init(mgr); +} + #ifdef MG_ENABLE_LINES #line 1 "src/net_builtin.c" #endif @@ -5292,288 +5579,6 @@ bool mg_send(struct mg_connection *c, const void *buf, size_t len) { } #endif // MG_ENABLE_TCPIP -#ifdef MG_ENABLE_LINES -#line 1 "src/net.c" -#endif - - - - - - - - - -size_t mg_vprintf(struct mg_connection *c, const char *fmt, va_list *ap) { - size_t old = c->send.len; - mg_vxprintf(mg_pfn_iobuf, &c->send, fmt, ap); - return c->send.len - old; -} - -size_t mg_printf(struct mg_connection *c, const char *fmt, ...) { - size_t len = 0; - va_list ap; - va_start(ap, fmt); - len = mg_vprintf(c, fmt, &ap); - va_end(ap); - return len; -} - -static bool mg_atonl(struct mg_str str, struct mg_addr *addr) { - uint32_t localhost = mg_htonl(0x7f000001); - if (mg_vcasecmp(&str, "localhost") != 0) return false; - memcpy(addr->ip, &localhost, sizeof(uint32_t)); - addr->is_ip6 = false; - return true; -} - -static bool mg_atone(struct mg_str str, struct mg_addr *addr) { - if (str.len > 0) return false; - memset(addr->ip, 0, sizeof(addr->ip)); - addr->is_ip6 = false; - return true; -} - -static bool mg_aton4(struct mg_str str, struct mg_addr *addr) { - uint8_t data[4] = {0, 0, 0, 0}; - size_t i, num_dots = 0; - for (i = 0; i < str.len; i++) { - if (str.ptr[i] >= '0' && str.ptr[i] <= '9') { - int octet = data[num_dots] * 10 + (str.ptr[i] - '0'); - if (octet > 255) return false; - data[num_dots] = (uint8_t) octet; - } else if (str.ptr[i] == '.') { - if (num_dots >= 3 || i == 0 || str.ptr[i - 1] == '.') return false; - num_dots++; - } else { - return false; - } - } - if (num_dots != 3 || str.ptr[i - 1] == '.') return false; - memcpy(&addr->ip, data, sizeof(data)); - addr->is_ip6 = false; - return true; -} - -static bool mg_v4mapped(struct mg_str str, struct mg_addr *addr) { - int i; - uint32_t ipv4; - if (str.len < 14) return false; - if (str.ptr[0] != ':' || str.ptr[1] != ':' || str.ptr[6] != ':') return false; - for (i = 2; i < 6; i++) { - if (str.ptr[i] != 'f' && str.ptr[i] != 'F') return false; - } - // struct mg_str s = mg_str_n(&str.ptr[7], str.len - 7); - if (!mg_aton4(mg_str_n(&str.ptr[7], str.len - 7), addr)) return false; - memcpy(&ipv4, addr->ip, sizeof(ipv4)); - memset(addr->ip, 0, sizeof(addr->ip)); - addr->ip[10] = addr->ip[11] = 255; - memcpy(&addr->ip[12], &ipv4, 4); - addr->is_ip6 = true; - return true; -} - -static bool mg_aton6(struct mg_str str, struct mg_addr *addr) { - size_t i, j = 0, n = 0, dc = 42; - addr->scope_id = 0; - if (str.len > 2 && str.ptr[0] == '[') str.ptr++, str.len -= 2; - if (mg_v4mapped(str, addr)) return true; - for (i = 0; i < str.len; i++) { - if ((str.ptr[i] >= '0' && str.ptr[i] <= '9') || - (str.ptr[i] >= 'a' && str.ptr[i] <= 'f') || - (str.ptr[i] >= 'A' && str.ptr[i] <= 'F')) { - unsigned long val; - if (i > j + 3) return false; - // MG_DEBUG(("%lu %lu [%.*s]", i, j, (int) (i - j + 1), &str.ptr[j])); - val = mg_unhexn(&str.ptr[j], i - j + 1); - addr->ip[n] = (uint8_t) ((val >> 8) & 255); - addr->ip[n + 1] = (uint8_t) (val & 255); - } else if (str.ptr[i] == ':') { - j = i + 1; - if (i > 0 && str.ptr[i - 1] == ':') { - dc = n; // Double colon - if (i > 1 && str.ptr[i - 2] == ':') return false; - } else if (i > 0) { - n += 2; - } - if (n > 14) return false; - addr->ip[n] = addr->ip[n + 1] = 0; // For trailing :: - } else if (str.ptr[i] == '%') { // Scope ID - for (i = i + 1; i < str.len; i++) { - if (str.ptr[i] < '0' || str.ptr[i] > '9') return false; - addr->scope_id = (uint8_t) (addr->scope_id * 10); - addr->scope_id = (uint8_t) (addr->scope_id + (str.ptr[i] - '0')); - } - } else { - return false; - } - } - if (n < 14 && dc == 42) return false; - if (n < 14) { - memmove(&addr->ip[dc + (14 - n)], &addr->ip[dc], n - dc + 2); - memset(&addr->ip[dc], 0, 14 - n); - } - - addr->is_ip6 = true; - return true; -} - -bool mg_aton(struct mg_str str, struct mg_addr *addr) { - // MG_INFO(("[%.*s]", (int) str.len, str.ptr)); - return mg_atone(str, addr) || mg_atonl(str, addr) || mg_aton4(str, addr) || - mg_aton6(str, addr); -} - -struct mg_connection *mg_alloc_conn(struct mg_mgr *mgr) { - struct mg_connection *c = - (struct mg_connection *) calloc(1, sizeof(*c) + mgr->extraconnsize); - if (c != NULL) { - c->mgr = mgr; - c->send.align = c->recv.align = c->rtls.align = MG_IO_SIZE; - c->id = ++mgr->nextid; - MG_PROF_INIT(c); - } - return c; -} - -void mg_close_conn(struct mg_connection *c) { - mg_resolve_cancel(c); // Close any pending DNS query - LIST_DELETE(struct mg_connection, &c->mgr->conns, c); - if (c == c->mgr->dns4.c) c->mgr->dns4.c = NULL; - if (c == c->mgr->dns6.c) c->mgr->dns6.c = NULL; - // Order of operations is important. `MG_EV_CLOSE` event must be fired - // before we deallocate received data, see #1331 - mg_call(c, MG_EV_CLOSE, NULL); - MG_DEBUG(("%lu %ld closed", c->id, c->fd)); - MG_PROF_DUMP(c); - MG_PROF_FREE(c); - - mg_tls_free(c); - mg_iobuf_free(&c->recv); - mg_iobuf_free(&c->send); - mg_iobuf_free(&c->rtls); - mg_bzero((unsigned char *) c, sizeof(*c)); - free(c); -} - -struct mg_connection *mg_connect(struct mg_mgr *mgr, const char *url, - mg_event_handler_t fn, void *fn_data) { - struct mg_connection *c = NULL; - if (url == NULL || url[0] == '\0') { - MG_ERROR(("null url")); - } else if ((c = mg_alloc_conn(mgr)) == NULL) { - MG_ERROR(("OOM")); - } else { - LIST_ADD_HEAD(struct mg_connection, &mgr->conns, c); - c->is_udp = (strncmp(url, "udp:", 4) == 0); - c->fd = (void *) (size_t) MG_INVALID_SOCKET; - c->fn = fn; - c->is_client = true; - c->fn_data = fn_data; - MG_DEBUG(("%lu %ld %s", c->id, c->fd, url)); - mg_call(c, MG_EV_OPEN, (void *) url); - mg_resolve(c, url); - } - return c; -} - -struct mg_connection *mg_listen(struct mg_mgr *mgr, const char *url, - mg_event_handler_t fn, void *fn_data) { - struct mg_connection *c = NULL; - if ((c = mg_alloc_conn(mgr)) == NULL) { - MG_ERROR(("OOM %s", url)); - } else if (!mg_open_listener(c, url)) { - MG_ERROR(("Failed: %s, errno %d", url, errno)); - MG_PROF_FREE(c); - free(c); - c = NULL; - } else { - c->is_listening = 1; - c->is_udp = strncmp(url, "udp:", 4) == 0; - LIST_ADD_HEAD(struct mg_connection, &mgr->conns, c); - c->fn = fn; - c->fn_data = fn_data; - mg_call(c, MG_EV_OPEN, NULL); - if (mg_url_is_ssl(url)) c->is_tls = 1; // Accepted connection must - MG_DEBUG(("%lu %ld %s", c->id, c->fd, url)); - } - return c; -} - -struct mg_connection *mg_wrapfd(struct mg_mgr *mgr, int fd, - mg_event_handler_t fn, void *fn_data) { - struct mg_connection *c = mg_alloc_conn(mgr); - if (c != NULL) { - c->fd = (void *) (size_t) fd; - c->fn = fn; - c->fn_data = fn_data; - MG_EPOLL_ADD(c); - mg_call(c, MG_EV_OPEN, NULL); - LIST_ADD_HEAD(struct mg_connection, &mgr->conns, c); - } - return c; -} - -struct mg_timer *mg_timer_add(struct mg_mgr *mgr, uint64_t milliseconds, - unsigned flags, void (*fn)(void *), void *arg) { - struct mg_timer *t = (struct mg_timer *) calloc(1, sizeof(*t)); - if (t != NULL) { - mg_timer_init(&mgr->timers, t, milliseconds, flags, fn, arg); - t->id = mgr->timerid++; - } - return t; -} - -long mg_io_recv(struct mg_connection *c, void *buf, size_t len) { - if (c->rtls.len == 0) return MG_IO_WAIT; - if (len > c->rtls.len) len = c->rtls.len; - memcpy(buf, c->rtls.buf, len); - mg_iobuf_del(&c->rtls, 0, len); - return (long) len; -} - -void mg_mgr_free(struct mg_mgr *mgr) { - struct mg_connection *c; - struct mg_timer *tmp, *t = mgr->timers; - while (t != NULL) tmp = t->next, free(t), t = tmp; - mgr->timers = NULL; // Important. Next call to poll won't touch timers - for (c = mgr->conns; c != NULL; c = c->next) c->is_closing = 1; - mg_mgr_poll(mgr, 0); -#if MG_ENABLE_FREERTOS_TCP - FreeRTOS_DeleteSocketSet(mgr->ss); -#endif - MG_DEBUG(("All connections closed")); -#if MG_ENABLE_EPOLL - if (mgr->epoll_fd >= 0) close(mgr->epoll_fd), mgr->epoll_fd = -1; -#endif - mg_tls_ctx_free(mgr); -} - -void mg_mgr_init(struct mg_mgr *mgr) { - memset(mgr, 0, sizeof(*mgr)); -#if MG_ENABLE_EPOLL - if ((mgr->epoll_fd = epoll_create1(EPOLL_CLOEXEC)) < 0) - MG_ERROR(("epoll_create1 errno %d", errno)); -#else - mgr->epoll_fd = -1; -#endif -#if MG_ARCH == MG_ARCH_WIN32 && MG_ENABLE_WINSOCK - // clang-format off - { WSADATA data; WSAStartup(MAKEWORD(2, 2), &data); } - // clang-format on -#elif MG_ENABLE_FREERTOS_TCP - mgr->ss = FreeRTOS_CreateSocketSet(); -#elif defined(__unix) || defined(__unix__) || defined(__APPLE__) - // Ignore SIGPIPE signal, so if client cancels the request, it - // won't kill the whole process. - signal(SIGPIPE, SIG_IGN); -#endif - mgr->dnstimeout = 3000; - mgr->dns4.url = "udp://8.8.8.8:53"; - mgr->dns6.url = "udp://[2001:4860:4860::8888]:53"; - mg_tls_ctx_init(mgr); -} - #ifdef MG_ENABLE_LINES #line 1 "src/ota_dummy.c" #endif @@ -6412,6 +6417,170 @@ void mg_sha1_final(unsigned char digest[20], mg_sha1_ctx *context) { memset(&finalcount, '\0', sizeof(finalcount)); } +#ifdef MG_ENABLE_LINES +#line 1 "src/sha256.c" +#endif + + +#define ror(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define ch(x, y, z) (((x) & (y)) ^ (~(x) & (z))) +#define maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define ep0(x) (ror(x, 2) ^ ror(x, 13) ^ ror(x, 22)) +#define ep1(x) (ror(x, 6) ^ ror(x, 11) ^ ror(x, 25)) +#define sig0(x) (ror(x, 7) ^ ror(x, 18) ^ ((x) >> 3)) +#define sig1(x) (ror(x, 17) ^ ror(x, 19) ^ ((x) >> 10)) + +static const uint32_t mg_sha256_k[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, + 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, + 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, + 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, + 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, + 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2}; + +void mg_sha256_init(mg_sha256_ctx *ctx) { + ctx->len = 0; + ctx->bits = 0; + ctx->state[0] = 0x6a09e667; + ctx->state[1] = 0xbb67ae85; + ctx->state[2] = 0x3c6ef372; + ctx->state[3] = 0xa54ff53a; + ctx->state[4] = 0x510e527f; + ctx->state[5] = 0x9b05688c; + ctx->state[6] = 0x1f83d9ab; + ctx->state[7] = 0x5be0cd19; +} + +static void mg_sha256_chunk(mg_sha256_ctx *ctx) { + int i, j; + uint32_t a, b, c, d, e, f, g, h; + uint32_t m[64]; + for (i = 0, j = 0; i < 16; ++i, j += 4) + m[i] = (uint32_t) ((ctx->buffer[j] << 24) | (ctx->buffer[j + 1] << 16) | + (ctx->buffer[j + 2] << 8) | (ctx->buffer[j + 3])); + for (; i < 64; ++i) + m[i] = sig1(m[i - 2]) + m[i - 7] + sig0(m[i - 15]) + m[i - 16]; + + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + e = ctx->state[4]; + f = ctx->state[5]; + g = ctx->state[6]; + h = ctx->state[7]; + + for (i = 0; i < 64; ++i) { + uint32_t t1 = h + ep1(e) + ch(e, f, g) + mg_sha256_k[i] + m[i]; + uint32_t t2 = ep0(a) + maj(a, b, c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + } + + ctx->state[0] += a; + ctx->state[1] += b; + ctx->state[2] += c; + ctx->state[3] += d; + ctx->state[4] += e; + ctx->state[5] += f; + ctx->state[6] += g; + ctx->state[7] += h; +} + +void mg_sha256_update(mg_sha256_ctx *ctx, const unsigned char *data, + size_t len) { + size_t i; + for (i = 0; i < len; i++) { + ctx->buffer[ctx->len] = data[i]; + if ((++ctx->len) == 64) { + mg_sha256_chunk(ctx); + ctx->bits += 512; + ctx->len = 0; + } + } +} + +// TODO: make final reusable (remove side effects) +void mg_sha256_final(unsigned char digest[32], mg_sha256_ctx *ctx) { + uint32_t i = ctx->len; + if (i < 56) { + ctx->buffer[i++] = 0x80; + while (i < 56) { + ctx->buffer[i++] = 0x00; + } + } else { + ctx->buffer[i++] = 0x80; + while (i < 64) { + ctx->buffer[i++] = 0x00; + } + mg_sha256_chunk(ctx); + memset(ctx->buffer, 0, 56); + } + + ctx->bits += ctx->len * 8; + ctx->buffer[63] = (uint8_t) ((ctx->bits) & 0xff); + ctx->buffer[62] = (uint8_t) ((ctx->bits >> 8) & 0xff); + ctx->buffer[61] = (uint8_t) ((ctx->bits >> 16) & 0xff); + ctx->buffer[60] = (uint8_t) ((ctx->bits >> 24) & 0xff); + ctx->buffer[59] = (uint8_t) ((ctx->bits >> 32) & 0xff); + ctx->buffer[58] = (uint8_t) ((ctx->bits >> 40) & 0xff); + ctx->buffer[57] = (uint8_t) ((ctx->bits >> 48) & 0xff); + ctx->buffer[56] = (uint8_t) ((ctx->bits >> 56) & 0xff); + mg_sha256_chunk(ctx); + + for (i = 0; i < 4; ++i) { + digest[i] = (ctx->state[0] >> (24 - i * 8)) & 0xff; + digest[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0xff; + digest[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0xff; + digest[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0xff; + digest[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0xff; + digest[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0xff; + digest[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0xff; + digest[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0xff; + } +} + +void mg_hmac_sha256(uint8_t dst[32], uint8_t *key, size_t keysz, uint8_t *data, + size_t datasz) { + mg_sha256_ctx ctx; + uint8_t k[64] = {0}; + uint8_t o_pad[64], i_pad[64]; + unsigned int i; + memset(i_pad, 0x36, sizeof(i_pad)); + memset(o_pad, 0x5c, sizeof(o_pad)); + if (keysz < 64) { + memmove(k, key, keysz); + } else { + mg_sha256_init(&ctx); + mg_sha256_update(&ctx, key, keysz); + mg_sha256_final(k, &ctx); + } + for (i = 0; i < sizeof(k); i++) { + i_pad[i] ^= k[i]; + o_pad[i] ^= k[i]; + } + mg_sha256_init(&ctx); + mg_sha256_update(&ctx, i_pad, sizeof(i_pad)); + mg_sha256_update(&ctx, data, datasz); + mg_sha256_final(dst, &ctx); + mg_sha256_init(&ctx); + mg_sha256_update(&ctx, o_pad, sizeof(o_pad)); + mg_sha256_update(&ctx, dst, 32); + mg_sha256_final(dst, &ctx); +} + + #ifdef MG_ENABLE_LINES #line 1 "src/sntp.c" #endif @@ -7477,170 +7646,1930 @@ void mg_timer_poll(struct mg_timer **head, uint64_t now_ms) { } } +#ifdef MG_ENABLE_LINES +#line 1 "src/tls_aes128.c" +#endif +/****************************************************************************** + * + * THIS SOURCE CODE IS HEREBY PLACED INTO THE PUBLIC DOMAIN FOR THE GOOD OF ALL + * + * This is a simple and straightforward implementation of the AES Rijndael + * 128-bit block cipher designed by Vincent Rijmen and Joan Daemen. The focus + * of this work was correctness & accuracy. It is written in 'C' without any + * particular focus upon optimization or speed. It should be endian (memory + * byte order) neutral since the few places that care are handled explicitly. + * + * This implementation of Rijndael was created by Steven M. Gibson of GRC.com. + * + * It is intended for general purpose use, but was written in support of GRC's + * reference implementation of the SQRL (Secure Quick Reliable Login) client. + * + * See: http://csrc.nist.gov/archive/aes/rijndael/wsdindex.html + * + * NO COPYRIGHT IS CLAIMED IN THIS WORK, HOWEVER, NEITHER IS ANY WARRANTY MADE + * REGARDING ITS FITNESS FOR ANY PARTICULAR PURPOSE. USE IT AT YOUR OWN RISK. + * + *******************************************************************************/ + + + + +#if MG_TLS == MG_TLS_BUILTIN +static int aes_tables_inited = 0; // run-once flag for performing key + // expasion table generation (see below) +/* + * The following static local tables must be filled-in before the first use of + * the GCM or AES ciphers. They are used for the AES key expansion/scheduling + * and once built are read-only and thread safe. The "gcm_initialize" function + * must be called once during system initialization to populate these arrays + * for subsequent use by the AES key scheduler. If they have not been built + * before attempted use, an error will be returned to the caller. + * + * NOTE: GCM Encryption/Decryption does NOT REQUIRE AES decryption. Since + * GCM uses AES in counter-mode, where the AES cipher output is XORed with + * the GCM input, we ONLY NEED AES encryption. Thus, to save space AES + * decryption is typically disabled by setting AES_DECRYPTION to 0 in aes.h. + */ +// We always need our forward tables +static uchar FSb[256]; // Forward substitution box (FSb) +static uint32_t FT0[256]; // Forward key schedule assembly tables +static uint32_t FT1[256]; +static uint32_t FT2[256]; +static uint32_t FT3[256]; + +#if AES_DECRYPTION // We ONLY need reverse for decryption +static uchar RSb[256]; // Reverse substitution box (RSb) +static uint32_t RT0[256]; // Reverse key schedule assembly tables +static uint32_t RT1[256]; +static uint32_t RT2[256]; +static uint32_t RT3[256]; +#endif /* AES_DECRYPTION */ + +static uint32_t RCON[10]; // AES round constants + +/* + * Platform Endianness Neutralizing Load and Store Macro definitions + * AES wants platform-neutral Little Endian (LE) byte ordering + */ +#define GET_UINT32_LE(n, b, i) \ + { \ + (n) = ((uint32_t) (b)[(i)]) | ((uint32_t) (b)[(i) + 1] << 8) | \ + ((uint32_t) (b)[(i) + 2] << 16) | ((uint32_t) (b)[(i) + 3] << 24); \ + } + +#define PUT_UINT32_LE(n, b, i) \ + { \ + (b)[(i)] = (uchar) ((n)); \ + (b)[(i) + 1] = (uchar) ((n) >> 8); \ + (b)[(i) + 2] = (uchar) ((n) >> 16); \ + (b)[(i) + 3] = (uchar) ((n) >> 24); \ + } + +/* + * AES forward and reverse encryption round processing macros + */ +#define AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \ + { \ + X0 = *RK++ ^ FT0[(Y0) &0xFF] ^ FT1[(Y1 >> 8) & 0xFF] ^ \ + FT2[(Y2 >> 16) & 0xFF] ^ FT3[(Y3 >> 24) & 0xFF]; \ + \ + X1 = *RK++ ^ FT0[(Y1) &0xFF] ^ FT1[(Y2 >> 8) & 0xFF] ^ \ + FT2[(Y3 >> 16) & 0xFF] ^ FT3[(Y0 >> 24) & 0xFF]; \ + \ + X2 = *RK++ ^ FT0[(Y2) &0xFF] ^ FT1[(Y3 >> 8) & 0xFF] ^ \ + FT2[(Y0 >> 16) & 0xFF] ^ FT3[(Y1 >> 24) & 0xFF]; \ + \ + X3 = *RK++ ^ FT0[(Y3) &0xFF] ^ FT1[(Y0 >> 8) & 0xFF] ^ \ + FT2[(Y1 >> 16) & 0xFF] ^ FT3[(Y2 >> 24) & 0xFF]; \ + } + +#define AES_RROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \ + { \ + X0 = *RK++ ^ RT0[(Y0) &0xFF] ^ RT1[(Y3 >> 8) & 0xFF] ^ \ + RT2[(Y2 >> 16) & 0xFF] ^ RT3[(Y1 >> 24) & 0xFF]; \ + \ + X1 = *RK++ ^ RT0[(Y1) &0xFF] ^ RT1[(Y0 >> 8) & 0xFF] ^ \ + RT2[(Y3 >> 16) & 0xFF] ^ RT3[(Y2 >> 24) & 0xFF]; \ + \ + X2 = *RK++ ^ RT0[(Y2) &0xFF] ^ RT1[(Y1 >> 8) & 0xFF] ^ \ + RT2[(Y0 >> 16) & 0xFF] ^ RT3[(Y3 >> 24) & 0xFF]; \ + \ + X3 = *RK++ ^ RT0[(Y3) &0xFF] ^ RT1[(Y2 >> 8) & 0xFF] ^ \ + RT2[(Y1 >> 16) & 0xFF] ^ RT3[(Y0 >> 24) & 0xFF]; \ + } + +/* + * These macros improve the readability of the key + * generation initialization code by collapsing + * repetitive common operations into logical pieces. + */ +#define ROTL8(x) ((x << 8) & 0xFFFFFFFF) | (x >> 24) +#define XTIME(x) ((x << 1) ^ ((x & 0x80) ? 0x1B : 0x00)) +#define MUL(x, y) ((x && y) ? pow[(log[x] + log[y]) % 255] : 0) +#define MIX(x, y) \ + { \ + y = ((y << 1) | (y >> 7)) & 0xFF; \ + x ^= y; \ + } +#define CPY128 \ + { \ + *RK++ = *SK++; \ + *RK++ = *SK++; \ + *RK++ = *SK++; \ + *RK++ = *SK++; \ + } + +/****************************************************************************** + * + * AES_INIT_KEYGEN_TABLES + * + * Fills the AES key expansion tables allocated above with their static + * data. This is not "per key" data, but static system-wide read-only + * table data. THIS FUNCTION IS NOT THREAD SAFE. It must be called once + * at system initialization to setup the tables for all subsequent use. + * + ******************************************************************************/ +void aes_init_keygen_tables(void) { + int i, x, y, z; // general purpose iteration and computation locals + int pow[256]; + int log[256]; + + if (aes_tables_inited) return; + + // fill the 'pow' and 'log' tables over GF(2^8) + for (i = 0, x = 1; i < 256; i++) { + pow[i] = x; + log[x] = i; + x = (x ^ XTIME(x)) & 0xFF; + } + // compute the round constants + for (i = 0, x = 1; i < 10; i++) { + RCON[i] = (uint32_t) x; + x = XTIME(x) & 0xFF; + } + // fill the forward and reverse substitution boxes + FSb[0x00] = 0x63; +#if AES_DECRYPTION // whether AES decryption is supported + RSb[0x63] = 0x00; +#endif /* AES_DECRYPTION */ + + for (i = 1; i < 256; i++) { + x = y = pow[255 - log[i]]; + MIX(x, y); + MIX(x, y); + MIX(x, y); + MIX(x, y); + FSb[i] = (uchar) (x ^= 0x63); +#if AES_DECRYPTION // whether AES decryption is supported + RSb[x] = (uchar) i; +#endif /* AES_DECRYPTION */ + } + // generate the forward and reverse key expansion tables + for (i = 0; i < 256; i++) { + x = FSb[i]; + y = XTIME(x) & 0xFF; + z = (y ^ x) & 0xFF; + + FT0[i] = ((uint32_t) y) ^ ((uint32_t) x << 8) ^ ((uint32_t) x << 16) ^ + ((uint32_t) z << 24); + + FT1[i] = ROTL8(FT0[i]); + FT2[i] = ROTL8(FT1[i]); + FT3[i] = ROTL8(FT2[i]); + +#if AES_DECRYPTION // whether AES decryption is supported + x = RSb[i]; + + RT0[i] = ((uint32_t) MUL(0x0E, x)) ^ ((uint32_t) MUL(0x09, x) << 8) ^ + ((uint32_t) MUL(0x0D, x) << 16) ^ ((uint32_t) MUL(0x0B, x) << 24); + + RT1[i] = ROTL8(RT0[i]); + RT2[i] = ROTL8(RT1[i]); + RT3[i] = ROTL8(RT2[i]); +#endif /* AES_DECRYPTION */ + } + aes_tables_inited = 1; // flag that the tables have been generated +} // to permit subsequent use of the AES cipher + +/****************************************************************************** + * + * AES_SET_ENCRYPTION_KEY + * + * This is called by 'aes_setkey' when we're establishing a key for + * subsequent encryption. We give it a pointer to the encryption + * context, a pointer to the key, and the key's length in bytes. + * Valid lengths are: 16, 24 or 32 bytes (128, 192, 256 bits). + * + ******************************************************************************/ +static int aes_set_encryption_key(aes_context *ctx, const uchar *key, uint keysize) { + uint i; // general purpose iteration local + uint32_t *RK = ctx->rk; // initialize our RoundKey buffer pointer + + for (i = 0; i < (keysize >> 2); i++) { + GET_UINT32_LE(RK[i], key, i << 2); + } + + switch (ctx->rounds) { + case 10: + for (i = 0; i < 10; i++, RK += 4) { + RK[4] = RK[0] ^ RCON[i] ^ ((uint32_t) FSb[(RK[3] >> 8) & 0xFF]) ^ + ((uint32_t) FSb[(RK[3] >> 16) & 0xFF] << 8) ^ + ((uint32_t) FSb[(RK[3] >> 24) & 0xFF] << 16) ^ + ((uint32_t) FSb[(RK[3]) & 0xFF] << 24); + + RK[5] = RK[1] ^ RK[4]; + RK[6] = RK[2] ^ RK[5]; + RK[7] = RK[3] ^ RK[6]; + } + break; + + case 12: + for (i = 0; i < 8; i++, RK += 6) { + RK[6] = RK[0] ^ RCON[i] ^ ((uint32_t) FSb[(RK[5] >> 8) & 0xFF]) ^ + ((uint32_t) FSb[(RK[5] >> 16) & 0xFF] << 8) ^ + ((uint32_t) FSb[(RK[5] >> 24) & 0xFF] << 16) ^ + ((uint32_t) FSb[(RK[5]) & 0xFF] << 24); + + RK[7] = RK[1] ^ RK[6]; + RK[8] = RK[2] ^ RK[7]; + RK[9] = RK[3] ^ RK[8]; + RK[10] = RK[4] ^ RK[9]; + RK[11] = RK[5] ^ RK[10]; + } + break; + + case 14: + for (i = 0; i < 7; i++, RK += 8) { + RK[8] = RK[0] ^ RCON[i] ^ ((uint32_t) FSb[(RK[7] >> 8) & 0xFF]) ^ + ((uint32_t) FSb[(RK[7] >> 16) & 0xFF] << 8) ^ + ((uint32_t) FSb[(RK[7] >> 24) & 0xFF] << 16) ^ + ((uint32_t) FSb[(RK[7]) & 0xFF] << 24); + + RK[9] = RK[1] ^ RK[8]; + RK[10] = RK[2] ^ RK[9]; + RK[11] = RK[3] ^ RK[10]; + + RK[12] = RK[4] ^ ((uint32_t) FSb[(RK[11]) & 0xFF]) ^ + ((uint32_t) FSb[(RK[11] >> 8) & 0xFF] << 8) ^ + ((uint32_t) FSb[(RK[11] >> 16) & 0xFF] << 16) ^ + ((uint32_t) FSb[(RK[11] >> 24) & 0xFF] << 24); + + RK[13] = RK[5] ^ RK[12]; + RK[14] = RK[6] ^ RK[13]; + RK[15] = RK[7] ^ RK[14]; + } + break; + + default: + return -1; + } + return (0); +} + +#if AES_DECRYPTION // whether AES decryption is supported + +/****************************************************************************** + * + * AES_SET_DECRYPTION_KEY + * + * This is called by 'aes_setkey' when we're establishing a + * key for subsequent decryption. We give it a pointer to + * the encryption context, a pointer to the key, and the key's + * length in bits. Valid lengths are: 128, 192, or 256 bits. + * + ******************************************************************************/ +static int aes_set_decryption_key(aes_context *ctx, const uchar *key, uint keysize) { + int i, j; + aes_context cty; // a calling aes context for set_encryption_key + uint32_t *RK = ctx->rk; // initialize our RoundKey buffer pointer + uint32_t *SK; + int ret; + + cty.rounds = ctx->rounds; // initialize our local aes context + cty.rk = cty.buf; // round count and key buf pointer + + if ((ret = aes_set_encryption_key(&cty, key, keysize)) != 0) return (ret); + + SK = cty.rk + cty.rounds * 4; + + CPY128 // copy a 128-bit block from *SK to *RK + + for (i = ctx->rounds - 1, SK -= 8; i > 0; i--, SK -= 8) { + for (j = 0; j < 4; j++, SK++) { + *RK++ = RT0[FSb[(*SK) & 0xFF]] ^ RT1[FSb[(*SK >> 8) & 0xFF]] ^ + RT2[FSb[(*SK >> 16) & 0xFF]] ^ RT3[FSb[(*SK >> 24) & 0xFF]]; + } + } + CPY128 // copy a 128-bit block from *SK to *RK + memset(&cty, 0, sizeof(aes_context)); // clear local aes context + return (0); +} + +#endif /* AES_DECRYPTION */ + +/****************************************************************************** + * + * AES_SETKEY + * + * Invoked to establish the key schedule for subsequent encryption/decryption + * + ******************************************************************************/ +int aes_setkey(aes_context *ctx, // AES context provided by our caller + int mode, // ENCRYPT or DECRYPT flag + const uchar *key, // pointer to the key + uint keysize) // key length in bytes +{ + // since table initialization is not thread safe, we could either add + // system-specific mutexes and init the AES key generation tables on + // demand, or ask the developer to simply call "gcm_initialize" once during + // application startup before threading begins. That's what we choose. + if (!aes_tables_inited) return (-1); // fail the call when not inited. + + ctx->mode = mode; // capture the key type we're creating + ctx->rk = ctx->buf; // initialize our round key pointer + + switch (keysize) // set the rounds count based upon the keysize + { + case 16: + ctx->rounds = 10; + break; // 16-byte, 128-bit key + case 24: + ctx->rounds = 12; + break; // 24-byte, 192-bit key + case 32: + ctx->rounds = 14; + break; // 32-byte, 256-bit key + default: + return (-1); + } + +#if AES_DECRYPTION + if (mode == DECRYPT) // expand our key for encryption or decryption + return (aes_set_decryption_key(ctx, key, keysize)); + else /* ENCRYPT */ +#endif /* AES_DECRYPTION */ + return (aes_set_encryption_key(ctx, key, keysize)); +} + +/****************************************************************************** + * + * AES_CIPHER + * + * Perform AES encryption and decryption. + * The AES context will have been setup with the encryption mode + * and all keying information appropriate for the task. + * + ******************************************************************************/ +int aes_cipher(aes_context *ctx, const uchar input[16], uchar output[16]) { + int i; + uint32_t *RK, X0, X1, X2, X3, Y0, Y1, Y2, Y3; // general purpose locals + + RK = ctx->rk; + + GET_UINT32_LE(X0, input, 0); + X0 ^= *RK++; // load our 128-bit + GET_UINT32_LE(X1, input, 4); + X1 ^= *RK++; // input buffer in a storage + GET_UINT32_LE(X2, input, 8); + X2 ^= *RK++; // memory endian-neutral way + GET_UINT32_LE(X3, input, 12); + X3 ^= *RK++; + +#if AES_DECRYPTION // whether AES decryption is supported + + if (ctx->mode == DECRYPT) { + for (i = (ctx->rounds >> 1) - 1; i > 0; i--) { + AES_RROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3); + AES_RROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3); + } + + AES_RROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3); + + X0 = *RK++ ^ ((uint32_t) RSb[(Y0) &0xFF]) ^ + ((uint32_t) RSb[(Y3 >> 8) & 0xFF] << 8) ^ + ((uint32_t) RSb[(Y2 >> 16) & 0xFF] << 16) ^ + ((uint32_t) RSb[(Y1 >> 24) & 0xFF] << 24); + + X1 = *RK++ ^ ((uint32_t) RSb[(Y1) &0xFF]) ^ + ((uint32_t) RSb[(Y0 >> 8) & 0xFF] << 8) ^ + ((uint32_t) RSb[(Y3 >> 16) & 0xFF] << 16) ^ + ((uint32_t) RSb[(Y2 >> 24) & 0xFF] << 24); + + X2 = *RK++ ^ ((uint32_t) RSb[(Y2) &0xFF]) ^ + ((uint32_t) RSb[(Y1 >> 8) & 0xFF] << 8) ^ + ((uint32_t) RSb[(Y0 >> 16) & 0xFF] << 16) ^ + ((uint32_t) RSb[(Y3 >> 24) & 0xFF] << 24); + + X3 = *RK++ ^ ((uint32_t) RSb[(Y3) &0xFF]) ^ + ((uint32_t) RSb[(Y2 >> 8) & 0xFF] << 8) ^ + ((uint32_t) RSb[(Y1 >> 16) & 0xFF] << 16) ^ + ((uint32_t) RSb[(Y0 >> 24) & 0xFF] << 24); + } else /* ENCRYPT */ + { +#endif /* AES_DECRYPTION */ + + for (i = (ctx->rounds >> 1) - 1; i > 0; i--) { + AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3); + AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3); + } + + AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3); + + X0 = *RK++ ^ ((uint32_t) FSb[(Y0) &0xFF]) ^ + ((uint32_t) FSb[(Y1 >> 8) & 0xFF] << 8) ^ + ((uint32_t) FSb[(Y2 >> 16) & 0xFF] << 16) ^ + ((uint32_t) FSb[(Y3 >> 24) & 0xFF] << 24); + + X1 = *RK++ ^ ((uint32_t) FSb[(Y1) &0xFF]) ^ + ((uint32_t) FSb[(Y2 >> 8) & 0xFF] << 8) ^ + ((uint32_t) FSb[(Y3 >> 16) & 0xFF] << 16) ^ + ((uint32_t) FSb[(Y0 >> 24) & 0xFF] << 24); + + X2 = *RK++ ^ ((uint32_t) FSb[(Y2) &0xFF]) ^ + ((uint32_t) FSb[(Y3 >> 8) & 0xFF] << 8) ^ + ((uint32_t) FSb[(Y0 >> 16) & 0xFF] << 16) ^ + ((uint32_t) FSb[(Y1 >> 24) & 0xFF] << 24); + + X3 = *RK++ ^ ((uint32_t) FSb[(Y3) &0xFF]) ^ + ((uint32_t) FSb[(Y0 >> 8) & 0xFF] << 8) ^ + ((uint32_t) FSb[(Y1 >> 16) & 0xFF] << 16) ^ + ((uint32_t) FSb[(Y2 >> 24) & 0xFF] << 24); + +#if AES_DECRYPTION // whether AES decryption is supported + } +#endif /* AES_DECRYPTION */ + + PUT_UINT32_LE(X0, output, 0); + PUT_UINT32_LE(X1, output, 4); + PUT_UINT32_LE(X2, output, 8); + PUT_UINT32_LE(X3, output, 12); + + return (0); +} +/* end of aes.c */ +/****************************************************************************** + * + * THIS SOURCE CODE IS HEREBY PLACED INTO THE PUBLIC DOMAIN FOR THE GOOD OF ALL + * + * This is a simple and straightforward implementation of AES-GCM authenticated + * encryption. The focus of this work was correctness & accuracy. It is written + * in straight 'C' without any particular focus upon optimization or speed. It + * should be endian (memory byte order) neutral since the few places that care + * are handled explicitly. + * + * This implementation of AES-GCM was created by Steven M. Gibson of GRC.com. + * + * It is intended for general purpose use, but was written in support of GRC's + * reference implementation of the SQRL (Secure Quick Reliable Login) client. + * + * See: http://csrc.nist.gov/publications/nistpubs/800-38D/SP-800-38D.pdf + * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/ + * gcm/gcm-revised-spec.pdf + * + * NO COPYRIGHT IS CLAIMED IN THIS WORK, HOWEVER, NEITHER IS ANY WARRANTY MADE + * REGARDING ITS FITNESS FOR ANY PARTICULAR PURPOSE. USE IT AT YOUR OWN RISK. + * + *******************************************************************************/ + + +/****************************************************************************** + * ==== IMPLEMENTATION WARNING ==== + * + * This code was developed for use within SQRL's fixed environmnent. Thus, it + * is somewhat less "general purpose" than it would be if it were designed as + * a general purpose AES-GCM library. Specifically, it bothers with almost NO + * error checking on parameter limits, buffer bounds, etc. It assumes that it + * is being invoked by its author or by someone who understands the values it + * expects to receive. Its behavior will be undefined otherwise. + * + * All functions that might fail are defined to return 'ints' to indicate a + * problem. Most do not do so now. But this allows for error propagation out + * of internal functions if robust error checking should ever be desired. + * + ******************************************************************************/ + +/* Calculating the "GHASH" + * + * There are many ways of calculating the so-called GHASH in software, each with + * a traditional size vs performance tradeoff. The GHASH (Galois field hash) is + * an intriguing construction which takes two 128-bit strings (also the cipher's + * block size and the fundamental operation size for the system) and hashes them + * into a third 128-bit result. + * + * Many implementation solutions have been worked out that use large precomputed + * table lookups in place of more time consuming bit fiddling, and this approach + * can be scaled easily upward or downward as needed to change the time/space + * tradeoff. It's been studied extensively and there's a solid body of theory + * and practice. For example, without using any lookup tables an implementation + * might obtain 119 cycles per byte throughput, whereas using a simple, though + * large, key-specific 64 kbyte 8-bit lookup table the performance jumps to 13 + * cycles per byte. + * + * And Intel's processors have, since 2010, included an instruction which does + * the entire 128x128->128 bit job in just several 64x64->128 bit pieces. + * + * Since SQRL is interactive, and only processing a few 128-bit blocks, I've + * settled upon a relatively slower but appealing small-table compromise which + * folds a bunch of not only time consuming but also bit twiddling into a simple + * 16-entry table which is attributed to Victor Shoup's 1996 work while at + * Bellcore: "On Fast and Provably Secure MessageAuthentication Based on + * Universal Hashing." See: http://www.shoup.net/papers/macs.pdf + * See, also section 4.1 of the "gcm-revised-spec" cited above. + */ + +/* + * This 16-entry table of pre-computed constants is used by the + * GHASH multiplier to improve over a strictly table-free but + * significantly slower 128x128 bit multiple within GF(2^128). + */ +static const uint64_t last4[16] = { + 0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0, + 0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0}; + +/* + * Platform Endianness Neutralizing Load and Store Macro definitions + * GCM wants platform-neutral Big Endian (BE) byte ordering + */ +#define GET_UINT32_BE(n, b, i) \ + { \ + (n) = ((uint32_t) (b)[(i)] << 24) | ((uint32_t) (b)[(i) + 1] << 16) | \ + ((uint32_t) (b)[(i) + 2] << 8) | ((uint32_t) (b)[(i) + 3]); \ + } + +#define PUT_UINT32_BE(n, b, i) \ + { \ + (b)[(i)] = (uchar) ((n) >> 24); \ + (b)[(i) + 1] = (uchar) ((n) >> 16); \ + (b)[(i) + 2] = (uchar) ((n) >> 8); \ + (b)[(i) + 3] = (uchar) ((n)); \ + } + +/****************************************************************************** + * + * GCM_INITIALIZE + * + * Must be called once to initialize the GCM library. + * + * At present, this only calls the AES keygen table generator, which expands + * the AES keying tables for use. This is NOT A THREAD-SAFE function, so it + * MUST be called during system initialization before a multi-threading + * environment is running. + * + ******************************************************************************/ +int gcm_initialize(void) { + aes_init_keygen_tables(); + return (0); +} + +/****************************************************************************** + * + * GCM_MULT + * + * Performs a GHASH operation on the 128-bit input vector 'x', setting + * the 128-bit output vector to 'x' times H using our precomputed tables. + * 'x' and 'output' are seen as elements of GCM's GF(2^128) Galois field. + * + ******************************************************************************/ +static void gcm_mult(gcm_context *ctx, // pointer to established context + const uchar x[16], // pointer to 128-bit input vector + uchar output[16]) // pointer to 128-bit output vector +{ + int i; + uchar lo, hi, rem; + uint64_t zh, zl; + + lo = (uchar) (x[15] & 0x0f); + hi = (uchar) (x[15] >> 4); + zh = ctx->HH[lo]; + zl = ctx->HL[lo]; + + for (i = 15; i >= 0; i--) { + lo = (uchar) (x[i] & 0x0f); + hi = (uchar) (x[i] >> 4); + + if (i != 15) { + rem = (uchar) (zl & 0x0f); + zl = (zh << 60) | (zl >> 4); + zh = (zh >> 4); + zh ^= (uint64_t) last4[rem] << 48; + zh ^= ctx->HH[lo]; + zl ^= ctx->HL[lo]; + } + rem = (uchar) (zl & 0x0f); + zl = (zh << 60) | (zl >> 4); + zh = (zh >> 4); + zh ^= (uint64_t) last4[rem] << 48; + zh ^= ctx->HH[hi]; + zl ^= ctx->HL[hi]; + } + PUT_UINT32_BE(zh >> 32, output, 0); + PUT_UINT32_BE(zh, output, 4); + PUT_UINT32_BE(zl >> 32, output, 8); + PUT_UINT32_BE(zl, output, 12); +} + +/****************************************************************************** + * + * GCM_SETKEY + * + * This is called to set the AES-GCM key. It initializes the AES key + * and populates the gcm context's pre-calculated HTables. + * + ******************************************************************************/ +int gcm_setkey(gcm_context *ctx, // pointer to caller-provided gcm context + const uchar *key, // pointer to the AES encryption key + const uint keysize) // size in bytes (must be 16, 24, 32 for + // 128, 192 or 256-bit keys respectively) +{ + int ret, i, j; + uint64_t hi, lo; + uint64_t vl, vh; + unsigned char h[16]; + + memset(ctx, 0, sizeof(gcm_context)); // zero caller-provided GCM context + memset(h, 0, 16); // initialize the block to encrypt + + // encrypt the null 128-bit block to generate a key-based value + // which is then used to initialize our GHASH lookup tables + if ((ret = aes_setkey(&ctx->aes_ctx, ENCRYPT, key, keysize)) != 0) + return (ret); + if ((ret = aes_cipher(&ctx->aes_ctx, h, h)) != 0) return (ret); + + GET_UINT32_BE(hi, h, 0); // pack h as two 64-bit ints, big-endian + GET_UINT32_BE(lo, h, 4); + vh = (uint64_t) hi << 32 | lo; + + GET_UINT32_BE(hi, h, 8); + GET_UINT32_BE(lo, h, 12); + vl = (uint64_t) hi << 32 | lo; + + ctx->HL[8] = vl; // 8 = 1000 corresponds to 1 in GF(2^128) + ctx->HH[8] = vh; + ctx->HH[0] = 0; // 0 corresponds to 0 in GF(2^128) + ctx->HL[0] = 0; + + for (i = 4; i > 0; i >>= 1) { + uint32_t T = (uint32_t) (vl & 1) * 0xe1000000U; + vl = (vh << 63) | (vl >> 1); + vh = (vh >> 1) ^ ((uint64_t) T << 32); + ctx->HL[i] = vl; + ctx->HH[i] = vh; + } + for (i = 2; i < 16; i <<= 1) { + uint64_t *HiL = ctx->HL + i, *HiH = ctx->HH + i; + vh = *HiH; + vl = *HiL; + for (j = 1; j < i; j++) { + HiH[j] = vh ^ ctx->HH[j]; + HiL[j] = vl ^ ctx->HL[j]; + } + } + return (0); +} + +/****************************************************************************** + * + * GCM processing occurs four phases: SETKEY, START, UPDATE and FINISH. + * + * SETKEY: + * + * START: Sets the Encryption/Decryption mode. + * Accepts the initialization vector and additional data. + * + * UPDATE: Encrypts or decrypts the plaintext or ciphertext. + * + * FINISH: Performs a final GHASH to generate the authentication tag. + * + ****************************************************************************** + * + * GCM_START + * + * Given a user-provided GCM context, this initializes it, sets the encryption + * mode, and preprocesses the initialization vector and additional AEAD data. + * + ******************************************************************************/ +int gcm_start(gcm_context *ctx, // pointer to user-provided GCM context + int mode, // GCM_ENCRYPT or GCM_DECRYPT + const uchar *iv, // pointer to initialization vector + size_t iv_len, // IV length in bytes (should == 12) + const uchar *add, // ptr to additional AEAD data (NULL if none) + size_t add_len) // length of additional AEAD data (bytes) +{ + int ret; // our error return if the AES encrypt fails + uchar work_buf[16]; // XOR source built from provided IV if len != 16 + const uchar *p; // general purpose array pointer + size_t use_len; // byte count to process, up to 16 bytes + size_t i; // local loop iterator + + // since the context might be reused under the same key + // we zero the working buffers for this next new process + memset(ctx->y, 0x00, sizeof(ctx->y)); + memset(ctx->buf, 0x00, sizeof(ctx->buf)); + ctx->len = 0; + ctx->add_len = 0; + + ctx->mode = mode; // set the GCM encryption/decryption mode + ctx->aes_ctx.mode = ENCRYPT; // GCM *always* runs AES in ENCRYPTION mode + + if (iv_len == 12) { // GCM natively uses a 12-byte, 96-bit IV + memcpy(ctx->y, iv, iv_len); // copy the IV to the top of the 'y' buff + ctx->y[15] = 1; // start "counting" from 1 (not 0) + } else // if we don't have a 12-byte IV, we GHASH whatever we've been given + { + memset(work_buf, 0x00, 16); // clear the working buffer + PUT_UINT32_BE(iv_len * 8, work_buf, 12); // place the IV into buffer + + p = iv; + while (iv_len > 0) { + use_len = (iv_len < 16) ? iv_len : 16; + for (i = 0; i < use_len; i++) ctx->y[i] ^= p[i]; + gcm_mult(ctx, ctx->y, ctx->y); + iv_len -= use_len; + p += use_len; + } + for (i = 0; i < 16; i++) ctx->y[i] ^= work_buf[i]; + gcm_mult(ctx, ctx->y, ctx->y); + } + if ((ret = aes_cipher(&ctx->aes_ctx, ctx->y, ctx->base_ectr)) != 0) + return (ret); + + ctx->add_len = add_len; + p = add; + while (add_len > 0) { + use_len = (add_len < 16) ? add_len : 16; + for (i = 0; i < use_len; i++) ctx->buf[i] ^= p[i]; + gcm_mult(ctx, ctx->buf, ctx->buf); + add_len -= use_len; + p += use_len; + } + return (0); +} + +/****************************************************************************** + * + * GCM_UPDATE + * + * This is called once or more to process bulk plaintext or ciphertext data. + * We give this some number of bytes of input and it returns the same number + * of output bytes. If called multiple times (which is fine) all but the final + * invocation MUST be called with length mod 16 == 0. (Only the final call can + * have a partial block length of < 128 bits.) + * + ******************************************************************************/ +int gcm_update(gcm_context *ctx, // pointer to user-provided GCM context + size_t length, // length, in bytes, of data to process + const uchar *input, // pointer to source data + uchar *output) // pointer to destination data +{ + int ret; // our error return if the AES encrypt fails + uchar ectr[16]; // counter-mode cipher output for XORing + size_t use_len; // byte count to process, up to 16 bytes + size_t i; // local loop iterator + + ctx->len += length; // bump the GCM context's running length count + + while (length > 0) { + // clamp the length to process at 16 bytes + use_len = (length < 16) ? length : 16; + + // increment the context's 128-bit IV||Counter 'y' vector + for (i = 16; i > 12; i--) + if (++ctx->y[i - 1] != 0) break; + + // encrypt the context's 'y' vector under the established key + if ((ret = aes_cipher(&ctx->aes_ctx, ctx->y, ectr)) != 0) return (ret); + + // encrypt or decrypt the input to the output + if (ctx->mode == ENCRYPT) { + for (i = 0; i < use_len; i++) { + // XOR the cipher's ouptut vector (ectr) with our input + output[i] = (uchar) (ectr[i] ^ input[i]); + // now we mix in our data into the authentication hash. + // if we're ENcrypting we XOR in the post-XOR (output) + // results, but if we're DEcrypting we XOR in the input + // data + ctx->buf[i] ^= output[i]; + } + } else { + for (i = 0; i < use_len; i++) { + // but if we're DEcrypting we XOR in the input data first, + // i.e. before saving to ouput data, otherwise if the input + // and output buffer are the same (inplace decryption) we + // would not get the correct auth tag + + ctx->buf[i] ^= input[i]; + + // XOR the cipher's ouptut vector (ectr) with our input + output[i] = (uchar) (ectr[i] ^ input[i]); + } + } + gcm_mult(ctx, ctx->buf, ctx->buf); // perform a GHASH operation + + length -= use_len; // drop the remaining byte count to process + input += use_len; // bump our input pointer forward + output += use_len; // bump our output pointer forward + } + return (0); +} + +/****************************************************************************** + * + * GCM_FINISH + * + * This is called once after all calls to GCM_UPDATE to finalize the GCM. + * It performs the final GHASH to produce the resulting authentication TAG. + * + ******************************************************************************/ +int gcm_finish(gcm_context *ctx, // pointer to user-provided GCM context + uchar *tag, // pointer to buffer which receives the tag + size_t tag_len) // length, in bytes, of the tag-receiving buf +{ + uchar work_buf[16]; + uint64_t orig_len = ctx->len * 8; + uint64_t orig_add_len = ctx->add_len * 8; + size_t i; + + if (tag_len != 0) memcpy(tag, ctx->base_ectr, tag_len); + + if (orig_len || orig_add_len) { + memset(work_buf, 0x00, 16); + + PUT_UINT32_BE((orig_add_len >> 32), work_buf, 0); + PUT_UINT32_BE((orig_add_len), work_buf, 4); + PUT_UINT32_BE((orig_len >> 32), work_buf, 8); + PUT_UINT32_BE((orig_len), work_buf, 12); + + for (i = 0; i < 16; i++) ctx->buf[i] ^= work_buf[i]; + gcm_mult(ctx, ctx->buf, ctx->buf); + for (i = 0; i < tag_len; i++) tag[i] ^= ctx->buf[i]; + } + return (0); +} + +/****************************************************************************** + * + * GCM_CRYPT_AND_TAG + * + * This either encrypts or decrypts the user-provided data and, either + * way, generates an authentication tag of the requested length. It must be + * called with a GCM context whose key has already been set with GCM_SETKEY. + * + * The user would typically call this explicitly to ENCRYPT a buffer of data + * and optional associated data, and produce its an authentication tag. + * + * To reverse the process the user would typically call the companion + * GCM_AUTH_DECRYPT function to decrypt data and verify a user-provided + * authentication tag. The GCM_AUTH_DECRYPT function calls this function + * to perform its decryption and tag generation, which it then compares. + * + ******************************************************************************/ +int gcm_crypt_and_tag( + gcm_context *ctx, // gcm context with key already setup + int mode, // cipher direction: GCM_ENCRYPT or GCM_DECRYPT + const uchar *iv, // pointer to the 12-byte initialization vector + size_t iv_len, // byte length if the IV. should always be 12 + const uchar *add, // pointer to the non-ciphered additional data + size_t add_len, // byte length of the additional AEAD data + const uchar *input, // pointer to the cipher data source + uchar *output, // pointer to the cipher data destination + size_t length, // byte length of the cipher data + uchar *tag, // pointer to the tag to be generated + size_t tag_len) // byte length of the tag to be generated +{ /* + assuming that the caller has already invoked gcm_setkey to + prepare the gcm context with the keying material, we simply + invoke each of the three GCM sub-functions in turn... + */ + gcm_start(ctx, mode, iv, iv_len, add, add_len); + gcm_update(ctx, length, input, output); + gcm_finish(ctx, tag, tag_len); + return (0); +} + +/****************************************************************************** + * + * GCM_AUTH_DECRYPT + * + * This DECRYPTS a user-provided data buffer with optional associated data. + * It then verifies a user-supplied authentication tag against the tag just + * re-created during decryption to verify that the data has not been altered. + * + * This function calls GCM_CRYPT_AND_TAG (above) to perform the decryption + * and authentication tag generation. + * + ******************************************************************************/ +int gcm_auth_decrypt( + gcm_context *ctx, // gcm context with key already setup + const uchar *iv, // pointer to the 12-byte initialization vector + size_t iv_len, // byte length if the IV. should always be 12 + const uchar *add, // pointer to the non-ciphered additional data + size_t add_len, // byte length of the additional AEAD data + const uchar *input, // pointer to the cipher data source + uchar *output, // pointer to the cipher data destination + size_t length, // byte length of the cipher data + const uchar *tag, // pointer to the tag to be authenticated + size_t tag_len) // byte length of the tag <= 16 +{ + uchar check_tag[16]; // the tag generated and returned by decryption + int diff; // an ORed flag to detect authentication errors + size_t i; // our local iterator + /* + we use GCM_DECRYPT_AND_TAG (above) to perform our decryption + (which is an identical XORing to reverse the previous one) + and also to re-generate the matching authentication tag + */ + gcm_crypt_and_tag(ctx, DECRYPT, iv, iv_len, add, add_len, input, output, + length, check_tag, tag_len); + + // now we verify the authentication tag in 'constant time' + for (diff = 0, i = 0; i < tag_len; i++) diff |= tag[i] ^ check_tag[i]; + + if (diff != 0) { // see whether any bits differed? + memset(output, 0, length); // if so... wipe the output data + return (GCM_AUTH_FAILURE); // return GCM_AUTH_FAILURE + } + return (0); +} + +/****************************************************************************** + * + * GCM_ZERO_CTX + * + * The GCM context contains both the GCM context and the AES context. + * This includes keying and key-related material which is security- + * sensitive, so it MUST be zeroed after use. This function does that. + * + ******************************************************************************/ +void gcm_zero_ctx(gcm_context *ctx) { + // zero the context originally provided to us + memset(ctx, 0, sizeof(gcm_context)); +} +// +// aes-gcm.c +// Pods +// +// Created by Markus Kosmal on 20/11/14. +// +// + +int aes_gcm_encrypt(unsigned char *output, // + const unsigned char *input, size_t input_length, + const unsigned char *key, const size_t key_len, + const unsigned char *iv, const size_t iv_len, + unsigned char *aead, size_t aead_len, unsigned char *tag, + const size_t tag_len) { + int ret = 0; // our return value + gcm_context ctx; // includes the AES context structure + + gcm_setkey(&ctx, key, (const uint) key_len); + + ret = gcm_crypt_and_tag(&ctx, ENCRYPT, iv, iv_len, aead, aead_len, input, output, + input_length, tag, tag_len); + + gcm_zero_ctx(&ctx); + + return (ret); +} + +int aes_gcm_decrypt(unsigned char *output, const unsigned char *input, + size_t input_length, const unsigned char *key, + const size_t key_len, const unsigned char *iv, + const size_t iv_len) { + int ret = 0; // our return value + gcm_context ctx; // includes the AES context structure + + size_t tag_len = 0; + unsigned char *tag_buf = NULL; + + gcm_setkey(&ctx, key, (const uint) key_len); + + ret = gcm_crypt_and_tag(&ctx, DECRYPT, iv, iv_len, NULL, 0, input, output, + input_length, tag_buf, tag_len); + + gcm_zero_ctx(&ctx); + + return (ret); +} +#endif + #ifdef MG_ENABLE_LINES #line 1 "src/tls_builtin.c" #endif #if MG_TLS == MG_TLS_BUILTIN -struct tls_data { - uint8_t client_random[32]; // From client hello - uint8_t client_pub[32]; // From client hello + +/* handshake is re-entrant, so we need to keep track of its state */ +enum mg_tls_hs_state { + MG_TLS_HS_CLIENT_HELLO, // first, wait for ClientHello + MG_TLS_HS_SERVER_HELLO, // then, send all server handshake data at once + MG_TLS_HS_CLIENT_CHANGE_CIPHER, // finally wait for ClientChangeCipher + MG_TLS_HS_CLIENT_FINISH, // and ClientFinish (encrypted) + MG_TLS_HS_DONE, // finish handshake, start application data flow }; -struct tls_ctx { - struct mg_iobuf server_cert; // Decoded server certificate - struct mg_iobuf server_key; // Decoded server private key + +/* per-connection TLS data */ +struct tls_data { + enum mg_tls_hs_state state; /* keep track of connection handshake progress */ + + struct mg_iobuf send; + struct mg_iobuf recv; + + mg_sha256_ctx sha256; /* incremental SHA-256 hash for TLS handshake */ + + uint32_t sseq; /* server sequence number, used in encryption */ + uint32_t cseq; /* client sequence number, used in decryption */ + + uint8_t session_id[32]; /* client session ID between the handshake states */ + uint8_t x25519_cli[32]; /* client X25519 key between the handshake states */ + uint8_t x25519_sec[32]; /* x25519 secret between the handshake + states */ + + struct mg_str server_cert_der; /* server certificate in DER format */ + uint8_t server_key[32]; /* server EC private key */ + + /* keys for AES encryption */ + uint8_t handshake_secret[32]; + uint8_t server_write_key[16]; + uint8_t server_write_iv[12]; + uint8_t server_finished_key[32]; + uint8_t client_write_key[16]; + uint8_t client_write_iv[12]; + uint8_t client_finished_key[32]; }; #define MG_LOAD_BE16(p) ((uint16_t) ((MG_U8P(p)[0] << 8U) | MG_U8P(p)[1])) #define TLS_HDR_SIZE 5 // 1 byte type, 2 bytes version, 2 bytes len -static inline bool mg_is_big_endian(void) { - int v = 1; - return *(unsigned char *) &v == 1; -} -static inline uint16_t mg_swap16(uint16_t v) { - return (uint16_t) ((v << 8U) | (v >> 8U)); -} -static inline uint16_t mg_be16(uint16_t v) { - return mg_is_big_endian() ? mg_swap16(v) : v; -} -#if 0 -static inline uint32_t mg_swap32(uint32_t v) { - return (v >> 24) | (v >> 8 & 0xff00) | (v << 8 & 0xff0000) | (v << 24); -} -static inline uint64_t mg_swap64(uint64_t v) { - return (((uint64_t) mg_swap32((uint32_t) v)) << 32) | - mg_swap32((uint32_t) (v >> 32)); -} -static inline uint32_t mg_be32(uint32_t v) { - return mg_is_big_endian() ? mg_swap32(v) : v; -} -#endif +// for derived tls keys we need SHA256([0]*32) +static uint8_t zeros[32] = {0}; +static uint8_t zeros_sha256_digest[32] = + "\xe3\xb0\xc4\x42\x98\xfc\x1c\x14\x9a\xfb\xf4\xc8\x99\x6f\xb9\x24" + "\x27\xae\x41\xe4\x64\x9b\x93\x4c\xa4\x95\x99\x1b\x78\x52\xb8\x55"; -static inline void add8(struct mg_iobuf *io, uint8_t data) { - mg_iobuf_add(io, io->len, &data, sizeof(data)); +#define X25519_BYTES 32 +const uint8_t X25519_BASE_POINT[X25519_BYTES] = {9}; + +#define X25519_WBITS 32 + +typedef uint32_t limb_t; +typedef uint64_t dlimb_t; +typedef int64_t sdlimb_t; +#define LIMB(x) (uint32_t)(x##ull), (uint32_t) ((x##ull) >> 32) + +#define NLIMBS (256 / X25519_WBITS) +typedef limb_t fe[NLIMBS]; + +static limb_t umaal(limb_t *carry, limb_t acc, limb_t mand, limb_t mier) { + dlimb_t tmp = (dlimb_t) mand * mier + acc + *carry; + *carry = (limb_t) (tmp >> X25519_WBITS); + return (limb_t) tmp; } -static inline void add16(struct mg_iobuf *io, uint16_t data) { - data = mg_htons(data); - mg_iobuf_add(io, io->len, &data, sizeof(data)); + +/* These functions are implemented in terms of umaal on ARM */ +static limb_t adc(limb_t *carry, limb_t acc, limb_t mand) { + dlimb_t total = (dlimb_t) *carry + acc + mand; + *carry = (limb_t) (total >> X25519_WBITS); + return (limb_t) total; } -static inline void add32(struct mg_iobuf *io, uint32_t data) { - data = mg_htonl(data); - mg_iobuf_add(io, io->len, &data, sizeof(data)); + +static limb_t adc0(limb_t *carry, limb_t acc) { + dlimb_t total = (dlimb_t) *carry + acc; + *carry = (limb_t) (total >> X25519_WBITS); + return (limb_t) total; +} + +/* Precondition: carry is small. + * Invariant: result of propagate is < 2^255 + 1 word + * In particular, always less than 2p. + * Also, output x >= min(x,19) + */ +static void propagate(fe x, limb_t over) { + unsigned i; + limb_t carry; + over = x[NLIMBS - 1] >> (X25519_WBITS - 1) | over << 1; + x[NLIMBS - 1] &= ~((limb_t) 1 << (X25519_WBITS - 1)); + + carry = over * 19; + for (i = 0; i < NLIMBS; i++) { + x[i] = adc0(&carry, x[i]); + } +} + +static void add(fe out, const fe a, const fe b) { + unsigned i; + limb_t carry = 0; + for (i = 0; i < NLIMBS; i++) { + out[i] = adc(&carry, a[i], b[i]); + } + propagate(out, carry); +} + +static void sub(fe out, const fe a, const fe b) { + unsigned i; + sdlimb_t carry = -38; + for (i = 0; i < NLIMBS; i++) { + carry = carry + a[i] - b[i]; + out[i] = (limb_t) carry; + carry >>= X25519_WBITS; + } + propagate(out, (limb_t) (1 + carry)); +} + +static void mul(fe out, const fe a, const fe b, unsigned nb) { + limb_t accum[2 * NLIMBS] = {0}; + unsigned i, j; + + limb_t carry2; + for (i = 0; i < nb; i++) { + limb_t mand = b[i]; + carry2 = 0; + for (j = 0; j < NLIMBS; j++) { + accum[i + j] = umaal(&carry2, accum[i + j], mand, a[j]); + } + accum[i + j] = carry2; + } + + carry2 = 0; + for (j = 0; j < NLIMBS; j++) { + out[j] = umaal(&carry2, accum[j], 38, accum[j + NLIMBS]); + } + propagate(out, carry2); +} + +static void sqr(fe out, const fe a) { + mul(out, a, a, NLIMBS); +} +static void mul1(fe out, const fe a) { + mul(out, a, out, NLIMBS); +} +static void sqr1(fe a) { + mul1(a, a); +} + +static void condswap(limb_t a[2 * NLIMBS], limb_t b[2 * NLIMBS], + limb_t doswap) { + unsigned i; + for (i = 0; i < 2 * NLIMBS; i++) { + limb_t xor = (a[i] ^ b[i]) & doswap; + a[i] ^= xor; + b[i] ^= xor; + } +} + +static limb_t canon(fe x) { + /* Canonicalize a field element x, reducing it to the least residue + * which is congruent to it mod 2^255-19. + * + * Precondition: x < 2^255 + 1 word + */ + + /* First, add 19. */ + unsigned i; + limb_t carry0 = 19; + limb_t res; + sdlimb_t carry; + for (i = 0; i < NLIMBS; i++) { + x[i] = adc0(&carry0, x[i]); + } + propagate(x, carry0); + + /* Here, 19 <= x2 < 2^255 + * + * This is because we added 19, so before propagate it can't be less than 19. + * After propagate, it still can't be less than 19, because if propagate does + * anything it adds 19. + * + * We know that the high bit must be clear, because either the input was + * ~ 2^255 + one word + 19 (in which case it propagates to at most 2 words) + * or it was < 2^255. + * + * So now, if we subtract 19, we will get back to something in [0,2^255-19). + */ + carry = -19; + res = 0; + for (i = 0; i < NLIMBS; i++) { + carry += x[i]; + res |= x[i] = (limb_t) carry; + carry >>= X25519_WBITS; + } + return (limb_t) (((dlimb_t) res - 1) >> X25519_WBITS); +} + +static const limb_t a24[1] = {121665}; + +static void ladder_part1(fe xs[5]) { + limb_t *x2 = xs[0], *z2 = xs[1], *x3 = xs[2], *z3 = xs[3], *t1 = xs[4]; + add(t1, x2, z2); // t1 = A + sub(z2, x2, z2); // z2 = B + add(x2, x3, z3); // x2 = C + sub(z3, x3, z3); // z3 = D + mul1(z3, t1); // z3 = DA + mul1(x2, z2); // x3 = BC + add(x3, z3, x2); // x3 = DA+CB + sub(z3, z3, x2); // z3 = DA-CB + sqr1(t1); // t1 = AA + sqr1(z2); // z2 = BB + sub(x2, t1, z2); // x2 = E = AA-BB + mul(z2, x2, a24, sizeof(a24) / sizeof(a24[0])); // z2 = E*a24 + add(z2, z2, t1); // z2 = E*a24 + AA +} + +static void ladder_part2(fe xs[5], const fe x1) { + limb_t *x2 = xs[0], *z2 = xs[1], *x3 = xs[2], *z3 = xs[3], *t1 = xs[4]; + sqr1(z3); // z3 = (DA-CB)^2 + mul1(z3, x1); // z3 = x1 * (DA-CB)^2 + sqr1(x3); // x3 = (DA+CB)^2 + mul1(z2, x2); // z2 = AA*(E*a24+AA) + sub(x2, t1, x2); // x2 = BB again + mul1(x2, t1); // x2 = AA*BB +} + +static void x25519_core(fe xs[5], const uint8_t scalar[X25519_BYTES], + const uint8_t *x1, int clamp) { + int i; + limb_t swap = 0; + limb_t *x2 = xs[0], *x3 = xs[2], *z3 = xs[3]; + memset(xs, 0, 4 * sizeof(fe)); + x2[0] = z3[0] = 1; + memcpy(x3, x1, sizeof(fe)); + + for (i = 255; i >= 0; i--) { + uint8_t bytei = scalar[i / 8]; + limb_t doswap; + if (clamp) { + if (i / 8 == 0) { + bytei &= (uint8_t) ~7U; + } else if (i / 8 == X25519_BYTES - 1) { + bytei &= 0x7F; + bytei |= 0x40; + } + } + doswap = 0 - (limb_t) ((bytei >> (i % 8)) & 1); + condswap(x2, x3, swap ^ doswap); + swap = doswap; + + ladder_part1(xs); + ladder_part2(xs, (const limb_t *) x1); + } + condswap(x2, x3, swap); +} + +static int x25519(uint8_t out[X25519_BYTES], const uint8_t scalar[X25519_BYTES], + const uint8_t x1[X25519_BYTES], int clamp) { + int i, ret; + fe xs[5]; + limb_t *x2, *z2, *z3, *prev; + static const struct { + uint8_t a, c, n; + } steps[13] = {{2, 1, 1}, {2, 1, 1}, {4, 2, 3}, {2, 4, 6}, {3, 1, 1}, + {3, 2, 12}, {4, 3, 25}, {2, 3, 25}, {2, 4, 50}, {3, 2, 125}, + {3, 1, 2}, {3, 1, 2}, {3, 1, 1}}; + x25519_core(xs, scalar, x1, clamp); + + /* Precomputed inversion chain */ + x2 = xs[0]; + z2 = xs[1]; + z3 = xs[3]; + + prev = z2; + for (i = 0; i < 13; i++) { + int j; + limb_t *a = xs[steps[i].a]; + for (j = steps[i].n; j > 0; j--) { + sqr(a, prev); + prev = a; + } + mul1(a, xs[steps[i].c]); + } + + /* Here prev = z3 */ + /* x2 /= z2 */ + mul((limb_t *) out, x2, z3, NLIMBS); + ret = (int) canon((limb_t *) out); + if (!clamp) ret = 0; + return ret; +} + +/* a help to hexdump buffers inline */ +static void mg_tls_hexdump(const char *msg, uint8_t *buf, size_t bufsz) { + char p[2048]; + MG_INFO(("%s: %s", msg, mg_hex(buf, bufsz, p))); +} + +/* TLS1.3 secret derivation based on the key label */ +static void mg_tls_derive_secret(const char *label, uint8_t *key, size_t keysz, + uint8_t *data, size_t datasz, uint8_t *hash, + size_t hashsz) { + size_t labelsz = strlen(label); + uint8_t secret[32]; + uint8_t packed[256] = {0, (uint8_t) hashsz, (uint8_t) labelsz}; + // TODO: assert lengths of label, key, data and hash + memmove(packed + 3, label, labelsz); + packed[3 + labelsz] = (uint8_t) datasz; + memmove(packed + labelsz + 4, data, datasz); + packed[4 + labelsz + datasz] = 1; + + mg_hmac_sha256(secret, key, keysz, packed, 5 + labelsz + datasz); + memmove(hash, secret, hashsz); +} + +/* receive as much data as we can, but at least one full TLS record */ +static int mg_tls_recv_msg(struct mg_connection *c) { + struct tls_data *tls = c->tls; + struct mg_iobuf *rio = &tls->recv; + uint16_t record_len; + // Pull data from TCP + for (;;) { + long n; + mg_iobuf_resize(rio, rio->len + 1); + n = mg_io_recv(c, &rio->buf[rio->len], rio->size - rio->len); + if (n > 0) { + rio->len += (size_t) n; + } else if (n == MG_IO_WAIT) { + break; + } else { + if (!c->is_closing) { + mg_error(c, "read IO err"); + } + return MG_IO_ERR; + } + } + // Look if we've pulled everything + if (rio->len < TLS_HDR_SIZE) return MG_IO_WAIT; + + record_len = MG_LOAD_BE16(rio->buf + 3); + if (rio->len < (size_t) TLS_HDR_SIZE + record_len) return MG_IO_WAIT; + return 0; +} + +// Remove a single TLS record from the recv buffer +static void mg_tls_drop_packet(struct mg_iobuf *rio) { + uint16_t n = MG_LOAD_BE16(rio->buf + 3) + TLS_HDR_SIZE; + mg_iobuf_del(rio, 0, n); + // memmove(rio->buf, rio->buf + n, rio->len - n); + // rio->len = rio->len - n; +} + +/* read and parse ClientHello record */ +static int mg_tls_client_hello(struct mg_connection *c) { + struct tls_data *tls = c->tls; + struct mg_iobuf *rio = &tls->recv; + uint8_t session_id_len; + uint16_t j; + uint16_t cipher_suites_len; + uint16_t ext_len; + uint8_t *ext; + + int r = mg_tls_recv_msg(c); + if (r < 0) { + return r; + } + if (rio->buf[0] != 0x16 || rio->buf[5] != 0x01) { + mg_error(c, "not a hello packet"); + return -1; + } + mg_sha256_update(&tls->sha256, rio->buf + 5, rio->len - 5); + session_id_len = rio->buf[43]; + if (session_id_len == sizeof(tls->session_id)) { + memmove(tls->session_id, rio->buf + 44, session_id_len); + } else if (session_id_len != 0) { + MG_INFO(("bad session id len")); + } + cipher_suites_len = MG_LOAD_BE16(rio->buf + 44 + session_id_len); + ext_len = MG_LOAD_BE16(rio->buf + 48 + session_id_len + cipher_suites_len); + ext = rio->buf + 50 + session_id_len + cipher_suites_len; + for (j = 0; j < ext_len;) { + uint16_t k; + uint16_t key_exchange_len; + uint8_t *key_exchange; + uint16_t n = MG_LOAD_BE16(ext + j + 2); + if (ext[j] != 0x00 || + ext[j + 1] != 0x33) { // not a key share extension, ignore + j += (uint16_t) (n + 4); + continue; + } + key_exchange_len = MG_LOAD_BE16(ext + j + 5); + key_exchange = ext + j + 6; + for (k = 0; k < key_exchange_len;) { + uint16_t m = MG_LOAD_BE16(key_exchange + k + 2); + if (m == 32 && key_exchange[k] == 0x00 && key_exchange[k + 1] == 0x1d) { + memmove(tls->x25519_cli, key_exchange + k + 4, m); + mg_tls_drop_packet(rio); + return 0; + } + k += (uint16_t) (m + 4); + } + j += (uint16_t) (n + 4); + } + mg_error(c, "bad client hello"); + return -1; +} + +/* put ServerHello record into wio buffer */ +static void mg_tls_server_hello(struct mg_connection *c) { + struct tls_data *tls = c->tls; + struct mg_iobuf *wio = &tls->send; + + uint8_t msg_server_hello[122] = + // server hello, tls 1.2 + "\x02\x00\x00\x76\x03\x03" + // random (32 bytes) + "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe" + "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe" + // session ID length + session ID (32 bytes) + "\x20" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" +#if defined(CHACHA20) && CHACHA20 + // TLS_CHACHA20_POLY1305_SHA256 + no compression + "\x13\x03\x00" +#else + // TLS_AES_128_GCM_SHA256 + no compression + "\x13\x01\x00" +#endif + // extensions + keyshare + "\x00\x2e\x00\x33\x00\x24\x00\x1d\x00\x20" + // x25519 keyshare + "\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab" + "\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab" + // supported versions (tls1.3 == 0x304) + "\x00\x2b\x00\x02\x03\x04"; + + // calculate keyshare + uint8_t x25519_pub[X25519_BYTES]; + uint8_t x25519_prv[X25519_BYTES]; + mg_random(x25519_prv, sizeof(x25519_prv)); + x25519(x25519_pub, x25519_prv, X25519_BASE_POINT, 1); + x25519(tls->x25519_sec, x25519_prv, tls->x25519_cli, 1); + mg_tls_hexdump("x25519 sec", tls->x25519_sec, sizeof(tls->x25519_sec)); + + // fill in the gaps: session ID + keyshare + memmove(msg_server_hello + 39, tls->session_id, sizeof(tls->session_id)); + memmove(msg_server_hello + 84, x25519_pub, sizeof(x25519_pub)); + + // server hello message + mg_iobuf_add(wio, wio->len, "\x16\x03\x03\x00\x7a", 5); + mg_iobuf_add(wio, wio->len, msg_server_hello, sizeof(msg_server_hello)); + mg_sha256_update(&tls->sha256, msg_server_hello, sizeof(msg_server_hello)); + + // change cipher message + mg_iobuf_add(wio, wio->len, "\x14\x03\x03\x00\x01\x01", 6); +} + +/* at this point we have x25519 shared secret, we can generate a + * set of derived handshake encryption keys */ +static void mg_tls_generate_handshake_keys(struct mg_connection *c) { + struct tls_data *tls = c->tls; + + mg_sha256_ctx sha256; + uint8_t early_secret[32]; + uint8_t pre_extract_secret[32]; + uint8_t hello_hash[32]; + uint8_t server_hs_secret[32]; + uint8_t client_hs_secret[32]; + + mg_hmac_sha256(early_secret, NULL, 0, zeros, sizeof(zeros)); + mg_tls_derive_secret("tls13 derived", early_secret, 32, zeros_sha256_digest, + 32, pre_extract_secret, 32); + mg_hmac_sha256(tls->handshake_secret, pre_extract_secret, + sizeof(pre_extract_secret), tls->x25519_sec, + sizeof(tls->x25519_sec)); + mg_tls_hexdump("hs secret", tls->handshake_secret, 32); + + // mg_sha256_final is not idempotent, need to copy sha256 context to calculate + // the digest + memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); + mg_sha256_final(hello_hash, &sha256); + + // derive keys needed for the rest of the handshake + mg_tls_derive_secret("tls13 s hs traffic", tls->handshake_secret, 32, + hello_hash, 32, server_hs_secret, 32); + mg_tls_derive_secret("tls13 key", server_hs_secret, 32, NULL, 0, + tls->server_write_key, 16); + mg_tls_derive_secret("tls13 iv", server_hs_secret, 32, NULL, 0, + tls->server_write_iv, 12); + mg_tls_derive_secret("tls13 finished", server_hs_secret, 32, NULL, 0, + tls->server_finished_key, 32); + mg_tls_hexdump("s hs traffic", server_hs_secret, 32); + + mg_tls_derive_secret("tls13 c hs traffic", tls->handshake_secret, 32, + hello_hash, 32, client_hs_secret, 32); + mg_tls_derive_secret("tls13 key", client_hs_secret, 32, NULL, 0, + tls->client_write_key, 16); + mg_tls_derive_secret("tls13 iv", client_hs_secret, 32, NULL, 0, + tls->client_write_iv, 12); + mg_tls_derive_secret("tls13 finished", client_hs_secret, 32, NULL, 0, + tls->client_finished_key, 32); +} + +/* AES GCM enctyption of the message + put encoded data into the write buffer */ +static void mg_tls_encrypt(struct mg_connection *c, const uint8_t *msg, + size_t msgsz, uint8_t msgtype) { + struct tls_data *tls = c->tls; + struct mg_iobuf *wio = &tls->send; + uint8_t *outmsg; + uint8_t *tag; + size_t encsz = msgsz + 16 + 1; + uint8_t hdr[5] = {0x17, 0x03, 0x03, (encsz >> 8) & 0xff, encsz & 0xff}; + uint8_t associated_data[5] = {0x17, 0x03, 0x03, (encsz >> 8) & 0xff, + encsz & 0xff}; + uint8_t nonce[12]; + memmove(nonce, tls->server_write_iv, sizeof(tls->server_write_iv)); + nonce[8] ^= (uint8_t) ((tls->sseq >> 24) & 255U); + nonce[9] ^= (uint8_t) ((tls->sseq >> 16) & 255U); + nonce[10] ^= (uint8_t) ((tls->sseq >> 8) & 255U); + nonce[11] ^= (uint8_t) ((tls->sseq) & 255U); + + gcm_initialize(); + mg_iobuf_add(wio, wio->len, hdr, sizeof(hdr)); + mg_iobuf_resize(wio, wio->len + encsz); + outmsg = wio->buf + wio->len; + tag = wio->buf + wio->len + msgsz + 1; + memmove(outmsg, msg, msgsz); + outmsg[msgsz] = msgtype; + aes_gcm_encrypt(outmsg, outmsg, msgsz + 1, tls->server_write_key, + sizeof(tls->server_write_key), nonce, sizeof(nonce), + associated_data, sizeof(associated_data), tag, 16); + wio->len += encsz; + tls->sseq++; +} + +/* read an encrypted message, decrypt it into read buffer (AES GCM) */ +static int mg_tls_recv_decrypt(struct mg_connection *c, void *buf, + size_t bufsz) { + struct tls_data *tls = c->tls; + struct mg_iobuf *rio = &tls->recv; + uint16_t msgsz; + uint8_t *msg; + uint8_t nonce[12]; + int r; + for (;;) { + r = mg_tls_recv_msg(c); + if (r < 0) return r; + if (rio->buf[0] == 0x17) { + break; + } else if (rio->buf[0] == 0x15) { + MG_INFO(("TLS ALERT packet received")); /* TODO: drop packet? */ + } else { + mg_error(c, "unexpected packet"); + return -1; + } + } + msgsz = MG_LOAD_BE16(rio->buf + 3); + msg = rio->buf + 5; + memmove(nonce, tls->client_write_iv, sizeof(tls->client_write_iv)); + nonce[8] ^= (uint8_t) ((tls->cseq >> 24) & 255U); + nonce[9] ^= (uint8_t) ((tls->cseq >> 16) & 255U); + nonce[10] ^= (uint8_t) ((tls->cseq >> 8) & 255U); + nonce[11] ^= (uint8_t) ((tls->cseq) & 255U); + aes_gcm_decrypt(msg, msg, msgsz - 16, tls->client_write_key, + sizeof(tls->client_write_key), nonce, sizeof(nonce)); + r = msgsz - 16 - 1; + if (msg[r] == 0x17) { + if (bufsz > 0) { + memmove(buf, msg, msgsz - 16); + } + } else { + r = 0; + } + tls->cseq++; + mg_tls_drop_packet(rio); + return r; +} + +static void mg_tls_server_extensions(struct mg_connection *c) { + struct tls_data *tls = c->tls; + // server extensions + uint8_t ext[6] = {0x08, 0, 0, 2, 0, 0}; + mg_sha256_update(&tls->sha256, ext, sizeof(ext)); + mg_tls_encrypt(c, ext, sizeof(ext), 0x16); +} + +static void mg_tls_server_cert(struct mg_connection *c) { + struct tls_data *tls = c->tls; + // server DER certificate (empty) + size_t n = tls->server_cert_der.len; + uint8_t *cert = calloc(1, 13 + n); // FIXME: free + cert[0] = 0x0b; // handshake header + cert[1] = (uint8_t) (((n + 9) >> 16) & 255U); // 3 bytes: payload length + cert[2] = (uint8_t) (((n + 9) >> 8) & 255U); + cert[3] = (uint8_t) ((n + 9) & 255U); + cert[4] = 0; // request context + cert[5] = (uint8_t) (((n + 5) >> 16) & 255U); // 3 bytes: cert (s) length + cert[6] = (uint8_t) (((n + 5) >> 8) & 255U); + cert[7] = (uint8_t) ((n + 5) & 255U); + cert[8] = + (uint8_t) (((n) >> 16) & 255U); // 3 bytes: first (and only) cert len + cert[9] = (uint8_t) (((n) >> 8) & 255U); + cert[10] = (uint8_t) (n & 255U); + // bytes 11+ are certificate in DER format + memmove(cert + 11, tls->server_cert_der.ptr, n); + cert[11 + n] = cert[12 + n] = 0; // certificate extensions (none) + mg_sha256_update(&tls->sha256, cert, 13 + n); + mg_tls_encrypt(c, cert, 13 + n, 0x16); +} + +/* type adapter between uECC hash context and our sha256 implementation */ +typedef struct SHA256_HashContext { + uECC_HashContext uECC; + mg_sha256_ctx ctx; +} SHA256_HashContext; + +static void init_SHA256(const uECC_HashContext *base) { + SHA256_HashContext *c = (SHA256_HashContext *) base; + mg_sha256_init(&c->ctx); +} + +static void update_SHA256(const uECC_HashContext *base, const uint8_t *message, + unsigned message_size) { + SHA256_HashContext *c = (SHA256_HashContext *) base; + mg_sha256_update(&c->ctx, message, message_size); +} +static void finish_SHA256(const uECC_HashContext *base, uint8_t *hash_result) { + SHA256_HashContext *c = (SHA256_HashContext *) base; + mg_sha256_final(hash_result, &c->ctx); +} + +static void mg_tls_server_verify_ecdsa(struct mg_connection *c) { + struct tls_data *tls = c->tls; + // server certificate verify packet + uint8_t verify[82] = {0x0f, 0x00, 0x00, 0x00, 0x04, 0x03, 0x00, 0x00}; + size_t sigsz, verifysz = 0; + uint8_t hash[32] = {0}, tmp[2 * 32 + 64] = {0}; + struct SHA256_HashContext ctx = { + {&init_SHA256, &update_SHA256, &finish_SHA256, 64, 32, tmp}, + {{0}, 0, 0, {0}}}; + int neg1, neg2; + uint8_t sig[64], sig_content[130] = { + " " + " " + "TLS 1.3, server CertificateVerify\0"}; + mg_sha256_ctx sha256; + memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); + mg_sha256_final(sig_content + 98, &sha256); + + mg_sha256_init(&sha256); + mg_sha256_update(&sha256, sig_content, sizeof(sig_content)); + mg_sha256_final(hash, &sha256); + + uECC_sign_deterministic(tls->server_key, hash, sizeof(hash), &ctx.uECC, sig, + uECC_secp256r1()); + + neg1 = !!(sig[0] & 0x80); + neg2 = !!(sig[32] & 0x80); + verify[8] = 0x30; /* ASN.1 SEQUENCE */ + verify[9] = (uint8_t) (68 + neg1 + neg2); + verify[10] = 0x02; /* ASN.1 INTEGER */ + verify[11] = (uint8_t) (32 + neg1); + memmove(verify + 12 + neg1, sig, 32); + verify[12 + 32 + neg1] = 0x02; /* ASN.1 INTEGER */ + verify[13 + 32 + neg1] = (uint8_t) (32 + neg2); + memmove(verify + 14 + 32 + neg1 + neg2, sig + 32, 32); + + sigsz = (size_t) (70 + neg1 + neg2); + verifysz = 8U + sigsz; + verify[3] = (uint8_t) (sigsz + 4); + verify[7] = (uint8_t) sigsz; + + mg_tls_hexdump("verify", verify, verifysz); + + mg_sha256_update(&tls->sha256, verify, verifysz); + mg_tls_encrypt(c, verify, verifysz, 0x16); +} + +static void mg_tls_server_finish(struct mg_connection *c) { + struct tls_data *tls = c->tls; + struct mg_iobuf *wio = &tls->send; + mg_sha256_ctx sha256; + uint8_t hash[32]; + uint8_t finish[36] = {0x14, 0, 0, 32}; + memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); + mg_sha256_final(hash, &sha256); + mg_hmac_sha256(finish + 4, tls->server_finished_key, 32, hash, 32); + mg_tls_hexdump("hash", hash, sizeof(hash)); + mg_tls_hexdump("key", tls->server_finished_key, + sizeof(tls->server_finished_key)); + mg_tls_encrypt(c, finish, sizeof(finish), 0x16); + mg_io_send(c, wio->buf, wio->len); + wio->len = 0; + + mg_sha256_update(&tls->sha256, finish, sizeof(finish)); +} + +static int mg_tls_client_change_cipher(struct mg_connection *c) { + struct tls_data *tls = c->tls; + struct mg_iobuf *rio = &tls->recv; + for (;;) { + int r = mg_tls_recv_msg(c); + if (r < 0) return r; + if (rio->buf[0] == 0x14) { // got a ChangeCipher record + break; + } else if (rio->buf[0] == 0x15) { // skip Alert records + MG_DEBUG(("TLS ALERT packet received")); + mg_tls_drop_packet(rio); + } else { + mg_error(c, "unexpected packet"); + return -1; + } + } + // consume ChangeCipher packet + mg_tls_drop_packet(rio); + return 0; +} + +static int mg_tls_client_finish(struct mg_connection *c) { + uint8_t tmp[2048]; + int n = mg_tls_recv_decrypt(c, tmp, sizeof(tmp)); + if (n < 0) { + return -1; + } + // TODO: make sure it's a ClientFinish record + return 0; +} + +static void mg_tls_generate_application_keys(struct mg_connection *c) { + struct tls_data *tls = c->tls; + uint8_t hash[32]; + uint8_t premaster_secret[32]; + uint8_t master_secret[32]; + uint8_t server_secret[32]; + uint8_t client_secret[32]; + + mg_sha256_ctx sha256; + memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); + mg_sha256_final(hash, &sha256); + + mg_tls_derive_secret("tls13 derived", tls->handshake_secret, 32, + zeros_sha256_digest, 32, premaster_secret, 32); + mg_hmac_sha256(master_secret, premaster_secret, 32, zeros, 32); + + mg_tls_derive_secret("tls13 s ap traffic", master_secret, 32, hash, 32, + server_secret, 32); + mg_tls_derive_secret("tls13 key", server_secret, 32, NULL, 0, + tls->server_write_key, 16); + mg_tls_derive_secret("tls13 iv", server_secret, 32, NULL, 0, + tls->server_write_iv, 12); + mg_tls_derive_secret("tls13 c ap traffic", master_secret, 32, hash, 32, + client_secret, 32); + mg_tls_derive_secret("tls13 key", client_secret, 32, NULL, 0, + tls->client_write_key, 16); + mg_tls_derive_secret("tls13 iv", client_secret, 32, NULL, 0, + tls->client_write_iv, 12); + + tls->sseq = tls->cseq = 0; +} + +void mg_tls_handshake(struct mg_connection *c) { + struct tls_data *tls = c->tls; + switch (tls->state) { + case MG_TLS_HS_CLIENT_HELLO: + if (mg_tls_client_hello(c) < 0) { + return; + } + tls->state = MG_TLS_HS_SERVER_HELLO; + // fallthrough + case MG_TLS_HS_SERVER_HELLO: + mg_tls_server_hello(c); + mg_tls_generate_handshake_keys(c); + mg_tls_server_extensions(c); + mg_tls_server_cert(c); + mg_tls_server_verify_ecdsa(c); + mg_tls_server_finish(c); + tls->state = MG_TLS_HS_CLIENT_CHANGE_CIPHER; + // fallthrough + case MG_TLS_HS_CLIENT_CHANGE_CIPHER: + if (mg_tls_client_change_cipher(c) < 0) { + return; + } + tls->state = MG_TLS_HS_CLIENT_FINISH; + // fallthrough + case MG_TLS_HS_CLIENT_FINISH: + if (mg_tls_client_finish(c) < 0) { + return; + } + mg_tls_generate_application_keys(c); + tls->state = MG_TLS_HS_DONE; + // fallthrough + case MG_TLS_HS_DONE: c->is_tls_hs = 0; return; + } +} + +static int mg_parse_pem(const struct mg_str pem, const struct mg_str label, + struct mg_str *der) { + size_t n = 0, m = 0; + char *s; + const char *c; + struct mg_str caps[5]; + if (!mg_match(pem, mg_str("#-----BEGIN #-----#-----END #-----#"), caps)) { + *der = mg_strdup(pem); + return 0; + } + if (mg_strcmp(caps[1], label) != 0 || mg_strcmp(caps[3], label) != 0) { + return -1; // bad label + } + if ((s = calloc(1, caps[2].len)) == NULL) { + return -1; + } + + for (c = caps[2].ptr; c < caps[2].ptr + caps[2].len; c++) { + if (*c == ' ' || *c == '\n' || *c == '\r' || *c == '\t') { + continue; + } + s[n++] = *c; + } + m = mg_base64_decode(s, n, s, n); + if (m == 0) { + free(s); + return -1; + } + der->ptr = s; + der->len = m; + return 0; } void mg_tls_init(struct mg_connection *c, const struct mg_tls_opts *opts) { + struct mg_str key; struct tls_data *tls = (struct tls_data *) calloc(1, sizeof(struct tls_data)); - if (tls != NULL) { - // tls->send.align = tls->recv.align = MG_IO_SIZE; - c->tls = tls; - c->is_tls = c->is_tls_hs = 1; - } else { + if (tls == NULL) { mg_error(c, "tls oom"); + return; } - (void) opts; + /* parse PEM or DER EC key */ + if (opts->key.ptr == NULL || + mg_parse_pem(opts->key, mg_str_s("EC PRIVATE KEY"), &key) < 0) { + MG_ERROR(("Failed to load EC private key")); + return; + } + if (key.len < 39) { + MG_ERROR(("EC private key too short")); + return; + } + /* expect ASN.1 SEQUENCE=[INTEGER=1, BITSTRING of 32 bytes, ...] */ + /* 30 nn 02 01 01 04 20 [key] ... */ + if (key.ptr[0] != 0x30 || (key.ptr[1] & 0x80) != 0) { + MG_ERROR(("EC private key: ASN.1 bad sequence")); + return; + } + if (memcmp(key.ptr + 2, "\x02\x01\x01\x04\x20", 5) != 0) { + MG_ERROR(("EC private key: ASN.1 bad data")); + } + memmove(tls->server_key, key.ptr + 7, 32); + free((void *) key.ptr); + + /* parse PEM or DER certificate */ + if (mg_parse_pem(opts->cert, mg_str_s("CERTIFICATE"), &tls->server_cert_der) < + 0) { + MG_ERROR(("Failed to load certificate")); + return; + } + + tls->send.align = tls->recv.align = MG_IO_SIZE; + c->tls = tls; + c->is_tls = c->is_tls_hs = 1; + mg_sha256_init(&tls->sha256); } + void mg_tls_free(struct mg_connection *c) { struct tls_data *tls = c->tls; if (tls != NULL) { - // mg_iobuf_free(&tls->send); - // mg_iobuf_free(&tls->recv); + mg_iobuf_free(&tls->send); + mg_iobuf_free(&tls->recv); + free((void *) tls->server_cert_der.ptr); } free(c->tls); c->tls = NULL; } + long mg_tls_send(struct mg_connection *c, const void *buf, size_t len) { - (void) c, (void) buf, (void) len; - // MG_INFO(("BBBBBBBB")); - return -1; -} -long mg_tls_recv(struct mg_connection *c, void *buf, size_t len) { - (void) c, (void) buf, (void) len; - char tmp[8192]; - long n = mg_io_recv(c, tmp, sizeof(tmp)); - if (n > 0) mg_hexdump(tmp, (size_t) n); - MG_INFO(("AAAAAAAA")); - return -1; - // struct mg_tls *tls = (struct mg_tls *) c->tls; - // long n = mbedtls_ssl_read(&tls->ssl, (unsigned char *) buf, len); - // if (n == MBEDTLS_ERR_SSL_WANT_READ || n == MBEDTLS_ERR_SSL_WANT_WRITE) - // return MG_IO_WAIT; - // if (n <= 0) return MG_IO_ERR; - // return n; -} -size_t mg_tls_pending(struct mg_connection *c) { - (void) c; - return 0; -} -void mg_tls_handshake(struct mg_connection *c) { - // struct tls_data *tls = c->tls; - struct mg_iobuf *rio = &c->raw; - struct mg_iobuf *wio = &c->send; - - // Look if we've pulled everything - if (rio->len < TLS_HDR_SIZE) return; - uint8_t record_type = rio->buf[0]; - uint16_t record_len = MG_LOAD_BE16(rio->buf + 3); - uint16_t record_version = MG_LOAD_BE16(rio->buf + 1); - if (record_type != 22) { - mg_error(c, "not a handshake"); - return; + struct tls_data *tls = c->tls; + long n = MG_IO_WAIT; + if (len > 2048) len = 2048; + mg_tls_encrypt(c, buf, len, 0x17); + while (tls->send.len > 0 && + (n = mg_io_send(c, tls->send.buf, tls->send.len)) > 0) { + mg_iobuf_del(&tls->send, 0, (size_t) n); } - if (rio->len < (size_t) TLS_HDR_SIZE + record_len) return; - // Got full hello - // struct tls_hello *hello = (struct tls_hello *) (hdr + 1); - MG_INFO(("CT=%d V=%hx L=%hu", record_type, record_version, record_len)); - // mg_hexdump(rio->buf, rio->len); - - // Send response. Server Hello - size_t ofs = wio->len; - add8(wio, 22), add16(wio, 0x303), add16(wio, 0); // Layer: type, ver, len - add8(wio, 2), add8(wio, 0), add16(wio, 0), add16(wio, 0x304); // Hello - mg_iobuf_add(wio, wio->len, NULL, 32); // 32 random - mg_random(wio->buf + wio->len - 32, 32); // bytes - add8(wio, 0); // Session ID - add16(wio, 0x1301); // Cipher: TLS_AES_128_GCM_SHA256 - add8(wio, 0); // Compression method: 0 - add16(wio, 46); // Extensions length - add16(wio, 43), add16(wio, 2), add16(wio, 0x304); // extension: TLS 1.3 - - // Key share: use curve x25519 (id 29) - add16(wio, 51), add16(wio, 36), add16(wio, 29), add16(wio, 32); // keyshare - mg_iobuf_add(wio, wio->len, NULL, 32); // 32 random - mg_random(wio->buf + wio->len - 32, 32); // bytes - *(uint16_t *) &wio->buf[ofs + 3] = mg_be16((uint16_t) (wio->len - ofs - 5)); - *(uint16_t *) &wio->buf[ofs + 7] = mg_be16((uint16_t) (wio->len - ofs - 9)); - - // Change cipher. Cipher's payload is an encypted app data - // ofs = wio->len; - add8(wio, 20), add16(wio, 0x303); // Layer: type, version - add16(wio, 1), add8(wio, 1); - - ofs = wio->len; // Application data - add8(wio, 23), add16(wio, 0x303), add16(wio, 5); // Layer: type, version - // mg_iobuf_add(wio, wio->len, "\x01\x02\x03\x04\x05", 5); - add8(wio, 22); // handshake message - add8(wio, 8); // encrypted extensions - add8(wio, 0), add16(wio, 2), add16(wio, 0); // empty 2 bytes - add8(wio, 11); // certificate message - add8(wio, 0), add16(wio, 4), add32(wio, 0x1020304); // len - *(uint16_t *) &wio->buf[ofs + 3] = mg_be16((uint16_t) (wio->len - ofs - 5)); - - mg_io_send(c, wio->buf, wio->len); - wio->len = 0; - - rio->len = 0; - c->is_tls_hs = 0; - mg_error(c, "doh"); + if (n == MG_IO_ERR || n == MG_IO_WAIT) return n; + return (long) len; } -void mg_tls_ctx_free(struct mg_mgr *mgr) { - free(mgr->tls_ctx); - mgr->tls_ctx = NULL; + +long mg_tls_recv(struct mg_connection *c, void *buf, size_t len) { + return mg_tls_recv_decrypt(c, buf, len); } + +size_t mg_tls_pending(struct mg_connection *c) { + struct tls_data *tls = (struct tls_data *) c->tls; + return tls == NULL ? 0 : tls->recv.len; +} + void mg_tls_ctx_init(struct mg_mgr *mgr) { (void) mgr; } + +void mg_tls_ctx_free(struct mg_mgr *mgr) { + (void) mgr; +} #endif #ifdef MG_ENABLE_LINES @@ -8151,6 +10080,3183 @@ void mg_tls_ctx_free(struct mg_mgr *mgr) { } #endif +#ifdef MG_ENABLE_LINES +#line 1 "src/tls_uecc.c" +#endif +/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */ + + + + +#if MG_TLS == MG_TLS_BUILTIN + +#ifndef uECC_RNG_MAX_TRIES +#define uECC_RNG_MAX_TRIES 64 +#endif + +#if uECC_ENABLE_VLI_API +#define uECC_VLI_API +#else +#define uECC_VLI_API static +#endif + +#if (uECC_PLATFORM == uECC_avr) || (uECC_PLATFORM == uECC_arm) || \ + (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2) +#define CONCATX(a, ...) a##__VA_ARGS__ +#define CONCAT(a, ...) CONCATX(a, __VA_ARGS__) + +#define STRX(a) #a +#define STR(a) STRX(a) + +#define EVAL(...) EVAL1(EVAL1(EVAL1(EVAL1(__VA_ARGS__)))) +#define EVAL1(...) EVAL2(EVAL2(EVAL2(EVAL2(__VA_ARGS__)))) +#define EVAL2(...) EVAL3(EVAL3(EVAL3(EVAL3(__VA_ARGS__)))) +#define EVAL3(...) EVAL4(EVAL4(EVAL4(EVAL4(__VA_ARGS__)))) +#define EVAL4(...) __VA_ARGS__ + +#define DEC_1 0 +#define DEC_2 1 +#define DEC_3 2 +#define DEC_4 3 +#define DEC_5 4 +#define DEC_6 5 +#define DEC_7 6 +#define DEC_8 7 +#define DEC_9 8 +#define DEC_10 9 +#define DEC_11 10 +#define DEC_12 11 +#define DEC_13 12 +#define DEC_14 13 +#define DEC_15 14 +#define DEC_16 15 +#define DEC_17 16 +#define DEC_18 17 +#define DEC_19 18 +#define DEC_20 19 +#define DEC_21 20 +#define DEC_22 21 +#define DEC_23 22 +#define DEC_24 23 +#define DEC_25 24 +#define DEC_26 25 +#define DEC_27 26 +#define DEC_28 27 +#define DEC_29 28 +#define DEC_30 29 +#define DEC_31 30 +#define DEC_32 31 + +#define DEC(N) CONCAT(DEC_, N) + +#define SECOND_ARG(_, val, ...) val +#define SOME_CHECK_0 ~, 0 +#define GET_SECOND_ARG(...) SECOND_ARG(__VA_ARGS__, SOME, ) +#define SOME_OR_0(N) GET_SECOND_ARG(CONCAT(SOME_CHECK_, N)) + +#define EMPTY(...) +#define DEFER(...) __VA_ARGS__ EMPTY() + +#define REPEAT_NAME_0() REPEAT_0 +#define REPEAT_NAME_SOME() REPEAT_SOME +#define REPEAT_0(...) +#define REPEAT_SOME(N, stuff) \ + DEFER(CONCAT(REPEAT_NAME_, SOME_OR_0(DEC(N))))()(DEC(N), stuff) stuff +#define REPEAT(N, stuff) EVAL(REPEAT_SOME(N, stuff)) + +#define REPEATM_NAME_0() REPEATM_0 +#define REPEATM_NAME_SOME() REPEATM_SOME +#define REPEATM_0(...) +#define REPEATM_SOME(N, macro) \ + macro(N) DEFER(CONCAT(REPEATM_NAME_, SOME_OR_0(DEC(N))))()(DEC(N), macro) +#define REPEATM(N, macro) EVAL(REPEATM_SOME(N, macro)) +#endif + +// + +#if (uECC_WORD_SIZE == 1) +#if uECC_SUPPORTS_secp160r1 +#define uECC_MAX_WORDS 21 /* Due to the size of curve_n. */ +#endif +#if uECC_SUPPORTS_secp192r1 +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 24 +#endif +#if uECC_SUPPORTS_secp224r1 +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 28 +#endif +#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 32 +#endif +#elif (uECC_WORD_SIZE == 4) +#if uECC_SUPPORTS_secp160r1 +#define uECC_MAX_WORDS 6 /* Due to the size of curve_n. */ +#endif +#if uECC_SUPPORTS_secp192r1 +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 6 +#endif +#if uECC_SUPPORTS_secp224r1 +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 7 +#endif +#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 8 +#endif +#elif (uECC_WORD_SIZE == 8) +#if uECC_SUPPORTS_secp160r1 +#define uECC_MAX_WORDS 3 +#endif +#if uECC_SUPPORTS_secp192r1 +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 3 +#endif +#if uECC_SUPPORTS_secp224r1 +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 4 +#endif +#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 4 +#endif +#endif /* uECC_WORD_SIZE */ + +#define BITS_TO_WORDS(num_bits) \ + ((wordcount_t) ((num_bits + ((uECC_WORD_SIZE * 8) - 1)) / \ + (uECC_WORD_SIZE * 8))) +#define BITS_TO_BYTES(num_bits) ((num_bits + 7) / 8) + +struct uECC_Curve_t { + wordcount_t num_words; + wordcount_t num_bytes; + bitcount_t num_n_bits; + uECC_word_t p[uECC_MAX_WORDS]; + uECC_word_t n[uECC_MAX_WORDS]; + uECC_word_t G[uECC_MAX_WORDS * 2]; + uECC_word_t b[uECC_MAX_WORDS]; + void (*double_jacobian)(uECC_word_t *X1, uECC_word_t *Y1, uECC_word_t *Z1, + uECC_Curve curve); +#if uECC_SUPPORT_COMPRESSED_POINT + void (*mod_sqrt)(uECC_word_t *a, uECC_Curve curve); +#endif + void (*x_side)(uECC_word_t *result, const uECC_word_t *x, uECC_Curve curve); +#if (uECC_OPTIMIZATION_LEVEL > 0) + void (*mmod_fast)(uECC_word_t *result, uECC_word_t *product); +#endif +}; + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN +static void bcopy(uint8_t *dst, const uint8_t *src, unsigned num_bytes) { + while (0 != num_bytes) { + num_bytes--; + dst[num_bytes] = src[num_bytes]; + } +} +#endif + +static cmpresult_t uECC_vli_cmp_unsafe(const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words); + +#if (uECC_PLATFORM == uECC_arm || uECC_PLATFORM == uECC_arm_thumb || \ + uECC_PLATFORM == uECC_arm_thumb2) + +#endif + +#if (uECC_PLATFORM == uECC_avr) + +#endif + +#ifndef asm_clear +#define asm_clear 0 +#endif +#ifndef asm_set +#define asm_set 0 +#endif +#ifndef asm_add +#define asm_add 0 +#endif +#ifndef asm_sub +#define asm_sub 0 +#endif +#ifndef asm_mult +#define asm_mult 0 +#endif +#ifndef asm_rshift1 +#define asm_rshift1 0 +#endif +#ifndef asm_mmod_fast_secp256r1 +#define asm_mmod_fast_secp256r1 0 +#endif + +#if defined(default_RNG_defined) && default_RNG_defined +static uECC_RNG_Function g_rng_function = &default_RNG; +#else +static uECC_RNG_Function g_rng_function = 0; +#endif + +void uECC_set_rng(uECC_RNG_Function rng_function) { + g_rng_function = rng_function; +} + +uECC_RNG_Function uECC_get_rng(void) { + return g_rng_function; +} + +int uECC_curve_private_key_size(uECC_Curve curve) { + return BITS_TO_BYTES(curve->num_n_bits); +} + +int uECC_curve_public_key_size(uECC_Curve curve) { + return 2 * curve->num_bytes; +} + +#if !asm_clear +uECC_VLI_API void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words) { + wordcount_t i; + for (i = 0; i < num_words; ++i) { + vli[i] = 0; + } +} +#endif /* !asm_clear */ + +/* Constant-time comparison to zero - secure way to compare long integers */ +/* Returns 1 if vli == 0, 0 otherwise. */ +uECC_VLI_API uECC_word_t uECC_vli_isZero(const uECC_word_t *vli, + wordcount_t num_words) { + uECC_word_t bits = 0; + wordcount_t i; + for (i = 0; i < num_words; ++i) { + bits |= vli[i]; + } + return (bits == 0); +} + +/* Returns nonzero if bit 'bit' of vli is set. */ +uECC_VLI_API uECC_word_t uECC_vli_testBit(const uECC_word_t *vli, + bitcount_t bit) { + return (vli[bit >> uECC_WORD_BITS_SHIFT] & + ((uECC_word_t) 1 << (bit & uECC_WORD_BITS_MASK))); +} + +/* Counts the number of words in vli. */ +static wordcount_t vli_numDigits(const uECC_word_t *vli, + const wordcount_t max_words) { + wordcount_t i; + /* Search from the end until we find a non-zero digit. + We do it in reverse because we expect that most digits will be nonzero. */ + for (i = max_words - 1; i >= 0 && vli[i] == 0; --i) { + } + + return (i + 1); +} + +/* Counts the number of bits required to represent vli. */ +uECC_VLI_API bitcount_t uECC_vli_numBits(const uECC_word_t *vli, + const wordcount_t max_words) { + uECC_word_t i; + uECC_word_t digit; + + wordcount_t num_digits = vli_numDigits(vli, max_words); + if (num_digits == 0) { + return 0; + } + + digit = vli[num_digits - 1]; + for (i = 0; digit; ++i) { + digit >>= 1; + } + + return (((bitcount_t) ((num_digits - 1) << uECC_WORD_BITS_SHIFT)) + + (bitcount_t) i); +} + +/* Sets dest = src. */ +#if !asm_set +uECC_VLI_API void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, + wordcount_t num_words) { + wordcount_t i; + for (i = 0; i < num_words; ++i) { + dest[i] = src[i]; + } +} +#endif /* !asm_set */ + +/* Returns sign of left - right. */ +static cmpresult_t uECC_vli_cmp_unsafe(const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words) { + wordcount_t i; + for (i = num_words - 1; i >= 0; --i) { + if (left[i] > right[i]) { + return 1; + } else if (left[i] < right[i]) { + return -1; + } + } + return 0; +} + +/* Constant-time comparison function - secure way to compare long integers */ +/* Returns one if left == right, zero otherwise. */ +uECC_VLI_API uECC_word_t uECC_vli_equal(const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words) { + uECC_word_t diff = 0; + wordcount_t i; + for (i = num_words - 1; i >= 0; --i) { + diff |= (left[i] ^ right[i]); + } + return (diff == 0); +} + +uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, + const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words); + +/* Returns sign of left - right, in constant time. */ +uECC_VLI_API cmpresult_t uECC_vli_cmp(const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words) { + uECC_word_t tmp[uECC_MAX_WORDS]; + uECC_word_t neg = !!uECC_vli_sub(tmp, left, right, num_words); + uECC_word_t equal = uECC_vli_isZero(tmp, num_words); + return (cmpresult_t) (!equal - 2 * neg); +} + +/* Computes vli = vli >> 1. */ +#if !asm_rshift1 +uECC_VLI_API void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words) { + uECC_word_t *end = vli; + uECC_word_t carry = 0; + + vli += num_words; + while (vli-- > end) { + uECC_word_t temp = *vli; + *vli = (temp >> 1) | carry; + carry = temp << (uECC_WORD_BITS - 1); + } +} +#endif /* !asm_rshift1 */ + +/* Computes result = left + right, returning carry. Can modify in place. */ +#if !asm_add +uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result, + const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words) { + uECC_word_t carry = 0; + wordcount_t i; + for (i = 0; i < num_words; ++i) { + uECC_word_t sum = left[i] + right[i] + carry; + if (sum != left[i]) { + carry = (sum < left[i]); + } + result[i] = sum; + } + return carry; +} +#endif /* !asm_add */ + +/* Computes result = left - right, returning borrow. Can modify in place. */ +#if !asm_sub +uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, + const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words) { + uECC_word_t borrow = 0; + wordcount_t i; + for (i = 0; i < num_words; ++i) { + uECC_word_t diff = left[i] - right[i] - borrow; + if (diff != left[i]) { + borrow = (diff > left[i]); + } + result[i] = diff; + } + return borrow; +} +#endif /* !asm_sub */ + +#if !asm_mult || (uECC_SQUARE_FUNC && !asm_square) || \ + (uECC_SUPPORTS_secp256k1 && (uECC_OPTIMIZATION_LEVEL > 0) && \ + ((uECC_WORD_SIZE == 1) || (uECC_WORD_SIZE == 8))) +static void muladd(uECC_word_t a, uECC_word_t b, uECC_word_t *r0, + uECC_word_t *r1, uECC_word_t *r2) { +#if uECC_WORD_SIZE == 8 + uint64_t a0 = a & 0xffffffff; + uint64_t a1 = a >> 32; + uint64_t b0 = b & 0xffffffff; + uint64_t b1 = b >> 32; + + uint64_t i0 = a0 * b0; + uint64_t i1 = a0 * b1; + uint64_t i2 = a1 * b0; + uint64_t i3 = a1 * b1; + + uint64_t p0, p1; + + i2 += (i0 >> 32); + i2 += i1; + if (i2 < i1) { /* overflow */ + i3 += 0x100000000; + } + + p0 = (i0 & 0xffffffff) | (i2 << 32); + p1 = i3 + (i2 >> 32); + + *r0 += p0; + *r1 += (p1 + (*r0 < p0)); + *r2 += ((*r1 < p1) || (*r1 == p1 && *r0 < p0)); +#else + uECC_dword_t p = (uECC_dword_t) a * b; + uECC_dword_t r01 = ((uECC_dword_t) (*r1) << uECC_WORD_BITS) | *r0; + r01 += p; + *r2 += (r01 < p); + *r1 = (uECC_word_t) (r01 >> uECC_WORD_BITS); + *r0 = (uECC_word_t) r01; +#endif +} +#endif /* muladd needed */ + +#if !asm_mult +uECC_VLI_API void uECC_vli_mult(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words) { + uECC_word_t r0 = 0; + uECC_word_t r1 = 0; + uECC_word_t r2 = 0; + wordcount_t i, k; + + /* Compute each digit of result in sequence, maintaining the carries. */ + for (k = 0; k < num_words; ++k) { + for (i = 0; i <= k; ++i) { + muladd(left[i], right[k - i], &r0, &r1, &r2); + } + result[k] = r0; + r0 = r1; + r1 = r2; + r2 = 0; + } + for (k = num_words; k < num_words * 2 - 1; ++k) { + for (i = (wordcount_t) ((k + 1) - num_words); i < num_words; ++i) { + muladd(left[i], right[k - i], &r0, &r1, &r2); + } + result[k] = r0; + r0 = r1; + r1 = r2; + r2 = 0; + } + result[num_words * 2 - 1] = r0; +} +#endif /* !asm_mult */ + +#if uECC_SQUARE_FUNC + +#if !asm_square +static void mul2add(uECC_word_t a, uECC_word_t b, uECC_word_t *r0, + uECC_word_t *r1, uECC_word_t *r2) { +#if uECC_WORD_SIZE == 8 + uint64_t a0 = a & 0xffffffffull; + uint64_t a1 = a >> 32; + uint64_t b0 = b & 0xffffffffull; + uint64_t b1 = b >> 32; + + uint64_t i0 = a0 * b0; + uint64_t i1 = a0 * b1; + uint64_t i2 = a1 * b0; + uint64_t i3 = a1 * b1; + + uint64_t p0, p1; + + i2 += (i0 >> 32); + i2 += i1; + if (i2 < i1) { /* overflow */ + i3 += 0x100000000ull; + } + + p0 = (i0 & 0xffffffffull) | (i2 << 32); + p1 = i3 + (i2 >> 32); + + *r2 += (p1 >> 63); + p1 = (p1 << 1) | (p0 >> 63); + p0 <<= 1; + + *r0 += p0; + *r1 += (p1 + (*r0 < p0)); + *r2 += ((*r1 < p1) || (*r1 == p1 && *r0 < p0)); +#else + uECC_dword_t p = (uECC_dword_t) a * b; + uECC_dword_t r01 = ((uECC_dword_t) (*r1) << uECC_WORD_BITS) | *r0; + *r2 += (p >> (uECC_WORD_BITS * 2 - 1)); + p *= 2; + r01 += p; + *r2 += (r01 < p); + *r1 = r01 >> uECC_WORD_BITS; + *r0 = (uECC_word_t) r01; +#endif +} + +uECC_VLI_API void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, + wordcount_t num_words) { + uECC_word_t r0 = 0; + uECC_word_t r1 = 0; + uECC_word_t r2 = 0; + + wordcount_t i, k; + + for (k = 0; k < num_words * 2 - 1; ++k) { + uECC_word_t min = (k < num_words ? 0 : (k + 1) - num_words); + for (i = min; i <= k && i <= k - i; ++i) { + if (i < k - i) { + mul2add(left[i], left[k - i], &r0, &r1, &r2); + } else { + muladd(left[i], left[k - i], &r0, &r1, &r2); + } + } + result[k] = r0; + r0 = r1; + r1 = r2; + r2 = 0; + } + + result[num_words * 2 - 1] = r0; +} +#endif /* !asm_square */ + +#else /* uECC_SQUARE_FUNC */ + +#if uECC_ENABLE_VLI_API +uECC_VLI_API void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, + wordcount_t num_words) { + uECC_vli_mult(result, left, left, num_words); +} +#endif /* uECC_ENABLE_VLI_API */ + +#endif /* uECC_SQUARE_FUNC */ + +/* Computes result = (left + right) % mod. + Assumes that left < mod and right < mod, and that result does not overlap + mod. */ +uECC_VLI_API void uECC_vli_modAdd(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, + const uECC_word_t *mod, + wordcount_t num_words) { + uECC_word_t carry = uECC_vli_add(result, left, right, num_words); + if (carry || uECC_vli_cmp_unsafe(mod, result, num_words) != 1) { + /* result > mod (result = mod + remainder), so subtract mod to get + * remainder. */ + uECC_vli_sub(result, result, mod, num_words); + } +} + +/* Computes result = (left - right) % mod. + Assumes that left < mod and right < mod, and that result does not overlap + mod. */ +uECC_VLI_API void uECC_vli_modSub(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, + const uECC_word_t *mod, + wordcount_t num_words) { + uECC_word_t l_borrow = uECC_vli_sub(result, left, right, num_words); + if (l_borrow) { + /* In this case, result == -diff == (max int) - diff. Since -x % d == d - x, + we can get the correct result from result + mod (with overflow). */ + uECC_vli_add(result, result, mod, num_words); + } +} + +/* Computes result = product % mod, where product is 2N words long. */ +/* Currently only designed to work for curve_p or curve_n. */ +uECC_VLI_API void uECC_vli_mmod(uECC_word_t *result, uECC_word_t *product, + const uECC_word_t *mod, wordcount_t num_words) { + uECC_word_t mod_multiple[2 * uECC_MAX_WORDS]; + uECC_word_t tmp[2 * uECC_MAX_WORDS]; + uECC_word_t *v[2] = {tmp, product}; + uECC_word_t index; + + /* Shift mod so its highest set bit is at the maximum position. */ + bitcount_t shift = (bitcount_t) ( + (num_words * 2 * uECC_WORD_BITS) - uECC_vli_numBits(mod, num_words)); + wordcount_t word_shift = (wordcount_t) (shift / uECC_WORD_BITS); + wordcount_t bit_shift = (wordcount_t) (shift % uECC_WORD_BITS); + uECC_word_t carry = 0; + uECC_vli_clear(mod_multiple, word_shift); + if (bit_shift > 0) { + for (index = 0; index < (uECC_word_t) num_words; ++index) { + mod_multiple[(uECC_word_t) word_shift + index] = + (uECC_word_t) (mod[index] << bit_shift) | carry; + carry = mod[index] >> (uECC_WORD_BITS - bit_shift); + } + } else { + uECC_vli_set(mod_multiple + word_shift, mod, num_words); + } + + for (index = 1; shift >= 0; --shift) { + uECC_word_t borrow = 0; + wordcount_t i; + for (i = 0; i < num_words * 2; ++i) { + uECC_word_t diff = v[index][i] - mod_multiple[i] - borrow; + if (diff != v[index][i]) { + borrow = (diff > v[index][i]); + } + v[1 - index][i] = diff; + } + index = !(index ^ borrow); /* Swap the index if there was no borrow */ + uECC_vli_rshift1(mod_multiple, num_words); + mod_multiple[num_words - 1] |= mod_multiple[num_words] + << (uECC_WORD_BITS - 1); + uECC_vli_rshift1(mod_multiple + num_words, num_words); + } + uECC_vli_set(result, v[index], num_words); +} + +/* Computes result = (left * right) % mod. */ +uECC_VLI_API void uECC_vli_modMult(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, + const uECC_word_t *mod, + wordcount_t num_words) { + uECC_word_t product[2 * uECC_MAX_WORDS]; + uECC_vli_mult(product, left, right, num_words); + uECC_vli_mmod(result, product, mod, num_words); +} + +uECC_VLI_API void uECC_vli_modMult_fast(uECC_word_t *result, + const uECC_word_t *left, + const uECC_word_t *right, + uECC_Curve curve) { + uECC_word_t product[2 * uECC_MAX_WORDS]; + uECC_vli_mult(product, left, right, curve->num_words); +#if (uECC_OPTIMIZATION_LEVEL > 0) + curve->mmod_fast(result, product); +#else + uECC_vli_mmod(result, product, curve->p, curve->num_words); +#endif +} + +#if uECC_SQUARE_FUNC + +#if uECC_ENABLE_VLI_API +/* Computes result = left^2 % mod. */ +uECC_VLI_API void uECC_vli_modSquare(uECC_word_t *result, + const uECC_word_t *left, + const uECC_word_t *mod, + wordcount_t num_words) { + uECC_word_t product[2 * uECC_MAX_WORDS]; + uECC_vli_square(product, left, num_words); + uECC_vli_mmod(result, product, mod, num_words); +} +#endif /* uECC_ENABLE_VLI_API */ + +uECC_VLI_API void uECC_vli_modSquare_fast(uECC_word_t *result, + const uECC_word_t *left, + uECC_Curve curve) { + uECC_word_t product[2 * uECC_MAX_WORDS]; + uECC_vli_square(product, left, curve->num_words); +#if (uECC_OPTIMIZATION_LEVEL > 0) + curve->mmod_fast(result, product); +#else + uECC_vli_mmod(result, product, curve->p, curve->num_words); +#endif +} + +#else /* uECC_SQUARE_FUNC */ + +#if uECC_ENABLE_VLI_API +uECC_VLI_API void uECC_vli_modSquare(uECC_word_t *result, + const uECC_word_t *left, + const uECC_word_t *mod, + wordcount_t num_words) { + uECC_vli_modMult(result, left, left, mod, num_words); +} +#endif /* uECC_ENABLE_VLI_API */ + +uECC_VLI_API void uECC_vli_modSquare_fast(uECC_word_t *result, + const uECC_word_t *left, + uECC_Curve curve) { + uECC_vli_modMult_fast(result, left, left, curve); +} + +#endif /* uECC_SQUARE_FUNC */ + +#define EVEN(vli) (!(vli[0] & 1)) +static void vli_modInv_update(uECC_word_t *uv, const uECC_word_t *mod, + wordcount_t num_words) { + uECC_word_t carry = 0; + if (!EVEN(uv)) { + carry = uECC_vli_add(uv, uv, mod, num_words); + } + uECC_vli_rshift1(uv, num_words); + if (carry) { + uv[num_words - 1] |= HIGH_BIT_SET; + } +} + +/* Computes result = (1 / input) % mod. All VLIs are the same size. + See "From Euclid's GCD to Montgomery Multiplication to the Great Divide" */ +uECC_VLI_API void uECC_vli_modInv(uECC_word_t *result, const uECC_word_t *input, + const uECC_word_t *mod, + wordcount_t num_words) { + uECC_word_t a[uECC_MAX_WORDS], b[uECC_MAX_WORDS], u[uECC_MAX_WORDS], + v[uECC_MAX_WORDS]; + cmpresult_t cmpResult; + + if (uECC_vli_isZero(input, num_words)) { + uECC_vli_clear(result, num_words); + return; + } + + uECC_vli_set(a, input, num_words); + uECC_vli_set(b, mod, num_words); + uECC_vli_clear(u, num_words); + u[0] = 1; + uECC_vli_clear(v, num_words); + while ((cmpResult = uECC_vli_cmp_unsafe(a, b, num_words)) != 0) { + if (EVEN(a)) { + uECC_vli_rshift1(a, num_words); + vli_modInv_update(u, mod, num_words); + } else if (EVEN(b)) { + uECC_vli_rshift1(b, num_words); + vli_modInv_update(v, mod, num_words); + } else if (cmpResult > 0) { + uECC_vli_sub(a, a, b, num_words); + uECC_vli_rshift1(a, num_words); + if (uECC_vli_cmp_unsafe(u, v, num_words) < 0) { + uECC_vli_add(u, u, mod, num_words); + } + uECC_vli_sub(u, u, v, num_words); + vli_modInv_update(u, mod, num_words); + } else { + uECC_vli_sub(b, b, a, num_words); + uECC_vli_rshift1(b, num_words); + if (uECC_vli_cmp_unsafe(v, u, num_words) < 0) { + uECC_vli_add(v, v, mod, num_words); + } + uECC_vli_sub(v, v, u, num_words); + vli_modInv_update(v, mod, num_words); + } + } + uECC_vli_set(result, u, num_words); +} + +/* ------ Point operations ------ */ + +/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ + +#ifndef _UECC_CURVE_SPECIFIC_H_ +#define _UECC_CURVE_SPECIFIC_H_ + +#define num_bytes_secp160r1 20 +#define num_bytes_secp192r1 24 +#define num_bytes_secp224r1 28 +#define num_bytes_secp256r1 32 +#define num_bytes_secp256k1 32 + +#if (uECC_WORD_SIZE == 1) + +#define num_words_secp160r1 20 +#define num_words_secp192r1 24 +#define num_words_secp224r1 28 +#define num_words_secp256r1 32 +#define num_words_secp256k1 32 + +#define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) \ + 0x##a, 0x##b, 0x##c, 0x##d, 0x##e, 0x##f, 0x##g, 0x##h +#define BYTES_TO_WORDS_4(a, b, c, d) 0x##a, 0x##b, 0x##c, 0x##d + +#elif (uECC_WORD_SIZE == 4) + +#define num_words_secp160r1 5 +#define num_words_secp192r1 6 +#define num_words_secp224r1 7 +#define num_words_secp256r1 8 +#define num_words_secp256k1 8 + +#define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) 0x##d##c##b##a, 0x##h##g##f##e +#define BYTES_TO_WORDS_4(a, b, c, d) 0x##d##c##b##a + +#elif (uECC_WORD_SIZE == 8) + +#define num_words_secp160r1 3 +#define num_words_secp192r1 3 +#define num_words_secp224r1 4 +#define num_words_secp256r1 4 +#define num_words_secp256k1 4 + +#define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) 0x##h##g##f##e##d##c##b##a##U +#define BYTES_TO_WORDS_4(a, b, c, d) 0x##d##c##b##a##U + +#endif /* uECC_WORD_SIZE */ + +#if uECC_SUPPORTS_secp160r1 || uECC_SUPPORTS_secp192r1 || \ + uECC_SUPPORTS_secp224r1 || uECC_SUPPORTS_secp256r1 +static void double_jacobian_default(uECC_word_t *X1, uECC_word_t *Y1, + uECC_word_t *Z1, uECC_Curve curve) { + /* t1 = X, t2 = Y, t3 = Z */ + uECC_word_t t4[uECC_MAX_WORDS]; + uECC_word_t t5[uECC_MAX_WORDS]; + wordcount_t num_words = curve->num_words; + + if (uECC_vli_isZero(Z1, num_words)) { + return; + } + + uECC_vli_modSquare_fast(t4, Y1, curve); /* t4 = y1^2 */ + uECC_vli_modMult_fast(t5, X1, t4, curve); /* t5 = x1*y1^2 = A */ + uECC_vli_modSquare_fast(t4, t4, curve); /* t4 = y1^4 */ + uECC_vli_modMult_fast(Y1, Y1, Z1, curve); /* t2 = y1*z1 = z3 */ + uECC_vli_modSquare_fast(Z1, Z1, curve); /* t3 = z1^2 */ + + uECC_vli_modAdd(X1, X1, Z1, curve->p, num_words); /* t1 = x1 + z1^2 */ + uECC_vli_modAdd(Z1, Z1, Z1, curve->p, num_words); /* t3 = 2*z1^2 */ + uECC_vli_modSub(Z1, X1, Z1, curve->p, num_words); /* t3 = x1 - z1^2 */ + uECC_vli_modMult_fast(X1, X1, Z1, curve); /* t1 = x1^2 - z1^4 */ + + uECC_vli_modAdd(Z1, X1, X1, curve->p, num_words); /* t3 = 2*(x1^2 - z1^4) */ + uECC_vli_modAdd(X1, X1, Z1, curve->p, num_words); /* t1 = 3*(x1^2 - z1^4) */ + if (uECC_vli_testBit(X1, 0)) { + uECC_word_t l_carry = uECC_vli_add(X1, X1, curve->p, num_words); + uECC_vli_rshift1(X1, num_words); + X1[num_words - 1] |= l_carry << (uECC_WORD_BITS - 1); + } else { + uECC_vli_rshift1(X1, num_words); + } + /* t1 = 3/2*(x1^2 - z1^4) = B */ + + uECC_vli_modSquare_fast(Z1, X1, curve); /* t3 = B^2 */ + uECC_vli_modSub(Z1, Z1, t5, curve->p, num_words); /* t3 = B^2 - A */ + uECC_vli_modSub(Z1, Z1, t5, curve->p, num_words); /* t3 = B^2 - 2A = x3 */ + uECC_vli_modSub(t5, t5, Z1, curve->p, num_words); /* t5 = A - x3 */ + uECC_vli_modMult_fast(X1, X1, t5, curve); /* t1 = B * (A - x3) */ + uECC_vli_modSub(t4, X1, t4, curve->p, + num_words); /* t4 = B * (A - x3) - y1^4 = y3 */ + + uECC_vli_set(X1, Z1, num_words); + uECC_vli_set(Z1, Y1, num_words); + uECC_vli_set(Y1, t4, num_words); +} + +/* Computes result = x^3 + ax + b. result must not overlap x. */ +static void x_side_default(uECC_word_t *result, const uECC_word_t *x, + uECC_Curve curve) { + uECC_word_t _3[uECC_MAX_WORDS] = {3}; /* -a = 3 */ + wordcount_t num_words = curve->num_words; + + uECC_vli_modSquare_fast(result, x, curve); /* r = x^2 */ + uECC_vli_modSub(result, result, _3, curve->p, num_words); /* r = x^2 - 3 */ + uECC_vli_modMult_fast(result, result, x, curve); /* r = x^3 - 3x */ + uECC_vli_modAdd(result, result, curve->b, curve->p, + num_words); /* r = x^3 - 3x + b */ +} +#endif /* uECC_SUPPORTS_secp... */ + +#if uECC_SUPPORT_COMPRESSED_POINT +#if uECC_SUPPORTS_secp160r1 || uECC_SUPPORTS_secp192r1 || \ + uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1 +/* Compute a = sqrt(a) (mod curve_p). */ +static void mod_sqrt_default(uECC_word_t *a, uECC_Curve curve) { + bitcount_t i; + uECC_word_t p1[uECC_MAX_WORDS] = {1}; + uECC_word_t l_result[uECC_MAX_WORDS] = {1}; + wordcount_t num_words = curve->num_words; + + /* When curve->p == 3 (mod 4), we can compute + sqrt(a) = a^((curve->p + 1) / 4) (mod curve->p). */ + uECC_vli_add(p1, curve->p, p1, num_words); /* p1 = curve_p + 1 */ + for (i = uECC_vli_numBits(p1, num_words) - 1; i > 1; --i) { + uECC_vli_modSquare_fast(l_result, l_result, curve); + if (uECC_vli_testBit(p1, i)) { + uECC_vli_modMult_fast(l_result, l_result, a, curve); + } + } + uECC_vli_set(a, l_result, num_words); +} +#endif /* uECC_SUPPORTS_secp... */ +#endif /* uECC_SUPPORT_COMPRESSED_POINT */ + +#if uECC_SUPPORTS_secp160r1 + +#if (uECC_OPTIMIZATION_LEVEL > 0) +static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product); +#endif + +static const struct uECC_Curve_t curve_secp160r1 = { + num_words_secp160r1, + num_bytes_secp160r1, + 161, /* num_n_bits */ + {BYTES_TO_WORDS_8(FF, FF, FF, 7F, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_4(FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(57, 22, 75, CA, D3, AE, 27, F9), + BYTES_TO_WORDS_8(C8, F4, 01, 00, 00, 00, 00, 00), + BYTES_TO_WORDS_8(00, 00, 00, 00, 01, 00, 00, 00)}, + {BYTES_TO_WORDS_8(82, FC, CB, 13, B9, 8B, C3, 68), + BYTES_TO_WORDS_8(89, 69, 64, 46, 28, 73, F5, 8E), + BYTES_TO_WORDS_4(68, B5, 96, 4A), + + BYTES_TO_WORDS_8(32, FB, C5, 7A, 37, 51, 23, 04), + BYTES_TO_WORDS_8(12, C9, DC, 59, 7D, 94, 68, 31), + BYTES_TO_WORDS_4(55, 28, A6, 23)}, + {BYTES_TO_WORDS_8(45, FA, 65, C5, AD, D4, D4, 81), + BYTES_TO_WORDS_8(9F, F8, AC, 65, 8B, 7A, BD, 54), + BYTES_TO_WORDS_4(FC, BE, 97, 1C)}, + &double_jacobian_default, +#if uECC_SUPPORT_COMPRESSED_POINT + &mod_sqrt_default, +#endif + &x_side_default, +#if (uECC_OPTIMIZATION_LEVEL > 0) + &vli_mmod_fast_secp160r1 +#endif +}; + +uECC_Curve uECC_secp160r1(void) { + return &curve_secp160r1; +} + +#if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp160r1) +/* Computes result = product % curve_p + see http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf page 354 + + Note that this only works if log2(omega) < log2(p) / 2 */ +static void omega_mult_secp160r1(uECC_word_t *result, const uECC_word_t *right); +#if uECC_WORD_SIZE == 8 +static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) { + uECC_word_t tmp[2 * num_words_secp160r1]; + uECC_word_t copy; + + uECC_vli_clear(tmp, num_words_secp160r1); + uECC_vli_clear(tmp + num_words_secp160r1, num_words_secp160r1); + + omega_mult_secp160r1(tmp, + product + num_words_secp160r1 - 1); /* (Rq, q) = q * c */ + + product[num_words_secp160r1 - 1] &= 0xffffffff; + copy = tmp[num_words_secp160r1 - 1]; + tmp[num_words_secp160r1 - 1] &= 0xffffffff; + uECC_vli_add(result, product, tmp, num_words_secp160r1); /* (C, r) = r + q */ + uECC_vli_clear(product, num_words_secp160r1); + tmp[num_words_secp160r1 - 1] = copy; + omega_mult_secp160r1(product, tmp + num_words_secp160r1 - 1); /* Rq*c */ + uECC_vli_add(result, result, product, + num_words_secp160r1); /* (C1, r) = r + Rq*c */ + + while (uECC_vli_cmp_unsafe(result, curve_secp160r1.p, num_words_secp160r1) > + 0) { + uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1); + } +} + +static void omega_mult_secp160r1(uint64_t *result, const uint64_t *right) { + uint32_t carry; + unsigned i; + + /* Multiply by (2^31 + 1). */ + carry = 0; + for (i = 0; i < num_words_secp160r1; ++i) { + uint64_t tmp = (right[i] >> 32) | (right[i + 1] << 32); + result[i] = (tmp << 31) + tmp + carry; + carry = (tmp >> 33) + (result[i] < tmp || (carry && result[i] == tmp)); + } + result[i] = carry; +} +#else +static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) { + uECC_word_t tmp[2 * num_words_secp160r1]; + uECC_word_t carry; + + uECC_vli_clear(tmp, num_words_secp160r1); + uECC_vli_clear(tmp + num_words_secp160r1, num_words_secp160r1); + + omega_mult_secp160r1(tmp, + product + num_words_secp160r1); /* (Rq, q) = q * c */ + + carry = uECC_vli_add(result, product, tmp, + num_words_secp160r1); /* (C, r) = r + q */ + uECC_vli_clear(product, num_words_secp160r1); + omega_mult_secp160r1(product, tmp + num_words_secp160r1); /* Rq*c */ + carry += uECC_vli_add(result, result, product, + num_words_secp160r1); /* (C1, r) = r + Rq*c */ + + while (carry > 0) { + --carry; + uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1); + } + if (uECC_vli_cmp_unsafe(result, curve_secp160r1.p, num_words_secp160r1) > 0) { + uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1); + } +} +#endif + +#if uECC_WORD_SIZE == 1 +static void omega_mult_secp160r1(uint8_t *result, const uint8_t *right) { + uint8_t carry; + uint8_t i; + + /* Multiply by (2^31 + 1). */ + uECC_vli_set(result + 4, right, num_words_secp160r1); /* 2^32 */ + uECC_vli_rshift1(result + 4, num_words_secp160r1); /* 2^31 */ + result[3] = right[0] << 7; /* get last bit from shift */ + + carry = + uECC_vli_add(result, result, right, num_words_secp160r1); /* 2^31 + 1 */ + for (i = num_words_secp160r1; carry; ++i) { + uint16_t sum = (uint16_t) result[i] + carry; + result[i] = (uint8_t) sum; + carry = sum >> 8; + } +} +#elif uECC_WORD_SIZE == 4 +static void omega_mult_secp160r1(uint32_t *result, const uint32_t *right) { + uint32_t carry; + unsigned i; + + /* Multiply by (2^31 + 1). */ + uECC_vli_set(result + 1, right, num_words_secp160r1); /* 2^32 */ + uECC_vli_rshift1(result + 1, num_words_secp160r1); /* 2^31 */ + result[0] = right[0] << 31; /* get last bit from shift */ + + carry = + uECC_vli_add(result, result, right, num_words_secp160r1); /* 2^31 + 1 */ + for (i = num_words_secp160r1; carry; ++i) { + uint64_t sum = (uint64_t) result[i] + carry; + result[i] = (uint32_t) sum; + carry = sum >> 32; + } +} +#endif /* uECC_WORD_SIZE */ +#endif /* (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp160r1) */ + +#endif /* uECC_SUPPORTS_secp160r1 */ + +#if uECC_SUPPORTS_secp192r1 + +#if (uECC_OPTIMIZATION_LEVEL > 0) +static void vli_mmod_fast_secp192r1(uECC_word_t *result, uECC_word_t *product); +#endif + +static const struct uECC_Curve_t curve_secp192r1 = { + num_words_secp192r1, + num_bytes_secp192r1, + 192, /* num_n_bits */ + {BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FE, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(31, 28, D2, B4, B1, C9, 6B, 14), + BYTES_TO_WORDS_8(36, F8, DE, 99, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(12, 10, FF, 82, FD, 0A, FF, F4), + BYTES_TO_WORDS_8(00, 88, A1, 43, EB, 20, BF, 7C), + BYTES_TO_WORDS_8(F6, 90, 30, B0, 0E, A8, 8D, 18), + + BYTES_TO_WORDS_8(11, 48, 79, 1E, A1, 77, F9, 73), + BYTES_TO_WORDS_8(D5, CD, 24, 6B, ED, 11, 10, 63), + BYTES_TO_WORDS_8(78, DA, C8, FF, 95, 2B, 19, 07)}, + {BYTES_TO_WORDS_8(B1, B9, 46, C1, EC, DE, B8, FE), + BYTES_TO_WORDS_8(49, 30, 24, 72, AB, E9, A7, 0F), + BYTES_TO_WORDS_8(E7, 80, 9C, E5, 19, 05, 21, 64)}, + &double_jacobian_default, +#if uECC_SUPPORT_COMPRESSED_POINT + &mod_sqrt_default, +#endif + &x_side_default, +#if (uECC_OPTIMIZATION_LEVEL > 0) + &vli_mmod_fast_secp192r1 +#endif +}; + +uECC_Curve uECC_secp192r1(void) { + return &curve_secp192r1; +} + +#if (uECC_OPTIMIZATION_LEVEL > 0) +/* Computes result = product % curve_p. + See algorithm 5 and 6 from http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf + */ +#if uECC_WORD_SIZE == 1 +static void vli_mmod_fast_secp192r1(uint8_t *result, uint8_t *product) { + uint8_t tmp[num_words_secp192r1]; + uint8_t carry; + + uECC_vli_set(result, product, num_words_secp192r1); + + uECC_vli_set(tmp, &product[24], num_words_secp192r1); + carry = uECC_vli_add(result, result, tmp, num_words_secp192r1); + + tmp[0] = tmp[1] = tmp[2] = tmp[3] = tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0; + tmp[8] = product[24]; + tmp[9] = product[25]; + tmp[10] = product[26]; + tmp[11] = product[27]; + tmp[12] = product[28]; + tmp[13] = product[29]; + tmp[14] = product[30]; + tmp[15] = product[31]; + tmp[16] = product[32]; + tmp[17] = product[33]; + tmp[18] = product[34]; + tmp[19] = product[35]; + tmp[20] = product[36]; + tmp[21] = product[37]; + tmp[22] = product[38]; + tmp[23] = product[39]; + carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); + + tmp[0] = tmp[8] = product[40]; + tmp[1] = tmp[9] = product[41]; + tmp[2] = tmp[10] = product[42]; + tmp[3] = tmp[11] = product[43]; + tmp[4] = tmp[12] = product[44]; + tmp[5] = tmp[13] = product[45]; + tmp[6] = tmp[14] = product[46]; + tmp[7] = tmp[15] = product[47]; + tmp[16] = tmp[17] = tmp[18] = tmp[19] = tmp[20] = tmp[21] = tmp[22] = + tmp[23] = 0; + carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); + + while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, + num_words_secp192r1) != 1) { + carry -= + uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1); + } +} +#elif uECC_WORD_SIZE == 4 +static void vli_mmod_fast_secp192r1(uint32_t *result, uint32_t *product) { + uint32_t tmp[num_words_secp192r1]; + int carry; + + uECC_vli_set(result, product, num_words_secp192r1); + + uECC_vli_set(tmp, &product[6], num_words_secp192r1); + carry = uECC_vli_add(result, result, tmp, num_words_secp192r1); + + tmp[0] = tmp[1] = 0; + tmp[2] = product[6]; + tmp[3] = product[7]; + tmp[4] = product[8]; + tmp[5] = product[9]; + carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); + + tmp[0] = tmp[2] = product[10]; + tmp[1] = tmp[3] = product[11]; + tmp[4] = tmp[5] = 0; + carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); + + while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, + num_words_secp192r1) != 1) { + carry -= + uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1); + } +} +#else +static void vli_mmod_fast_secp192r1(uint64_t *result, uint64_t *product) { + uint64_t tmp[num_words_secp192r1]; + int carry; + + uECC_vli_set(result, product, num_words_secp192r1); + + uECC_vli_set(tmp, &product[3], num_words_secp192r1); + carry = (int) uECC_vli_add(result, result, tmp, num_words_secp192r1); + + tmp[0] = 0; + tmp[1] = product[3]; + tmp[2] = product[4]; + carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); + + tmp[0] = tmp[1] = product[5]; + tmp[2] = 0; + carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); + + while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, + num_words_secp192r1) != 1) { + carry -= + uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1); + } +} +#endif /* uECC_WORD_SIZE */ +#endif /* (uECC_OPTIMIZATION_LEVEL > 0) */ + +#endif /* uECC_SUPPORTS_secp192r1 */ + +#if uECC_SUPPORTS_secp224r1 + +#if uECC_SUPPORT_COMPRESSED_POINT +static void mod_sqrt_secp224r1(uECC_word_t *a, uECC_Curve curve); +#endif +#if (uECC_OPTIMIZATION_LEVEL > 0) +static void vli_mmod_fast_secp224r1(uECC_word_t *result, uECC_word_t *product); +#endif + +static const struct uECC_Curve_t curve_secp224r1 = { + num_words_secp224r1, + num_bytes_secp224r1, + 224, /* num_n_bits */ + {BYTES_TO_WORDS_8(01, 00, 00, 00, 00, 00, 00, 00), + BYTES_TO_WORDS_8(00, 00, 00, 00, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_4(FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(3D, 2A, 5C, 5C, 45, 29, DD, 13), + BYTES_TO_WORDS_8(3E, F0, B8, E0, A2, 16, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_4(FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(21, 1D, 5C, 11, D6, 80, 32, 34), + BYTES_TO_WORDS_8(22, 11, C2, 56, D3, C1, 03, 4A), + BYTES_TO_WORDS_8(B9, 90, 13, 32, 7F, BF, B4, 6B), + BYTES_TO_WORDS_4(BD, 0C, 0E, B7), + + BYTES_TO_WORDS_8(34, 7E, 00, 85, 99, 81, D5, 44), + BYTES_TO_WORDS_8(64, 47, 07, 5A, A0, 75, 43, CD), + BYTES_TO_WORDS_8(E6, DF, 22, 4C, FB, 23, F7, B5), + BYTES_TO_WORDS_4(88, 63, 37, BD)}, + {BYTES_TO_WORDS_8(B4, FF, 55, 23, 43, 39, 0B, 27), + BYTES_TO_WORDS_8(BA, D8, BF, D7, B7, B0, 44, 50), + BYTES_TO_WORDS_8(56, 32, 41, F5, AB, B3, 04, 0C), + BYTES_TO_WORDS_4(85, 0A, 05, B4)}, + &double_jacobian_default, +#if uECC_SUPPORT_COMPRESSED_POINT + &mod_sqrt_secp224r1, +#endif + &x_side_default, +#if (uECC_OPTIMIZATION_LEVEL > 0) + &vli_mmod_fast_secp224r1 +#endif +}; + +uECC_Curve uECC_secp224r1(void) { + return &curve_secp224r1; +} + +#if uECC_SUPPORT_COMPRESSED_POINT +/* Routine 3.2.4 RS; from http://www.nsa.gov/ia/_files/nist-routines.pdf */ +static void mod_sqrt_secp224r1_rs(uECC_word_t *d1, uECC_word_t *e1, + uECC_word_t *f1, const uECC_word_t *d0, + const uECC_word_t *e0, + const uECC_word_t *f0) { + uECC_word_t t[num_words_secp224r1]; + + uECC_vli_modSquare_fast(t, d0, &curve_secp224r1); /* t <-- d0 ^ 2 */ + uECC_vli_modMult_fast(e1, d0, e0, &curve_secp224r1); /* e1 <-- d0 * e0 */ + uECC_vli_modAdd(d1, t, f0, curve_secp224r1.p, + num_words_secp224r1); /* d1 <-- t + f0 */ + uECC_vli_modAdd(e1, e1, e1, curve_secp224r1.p, + num_words_secp224r1); /* e1 <-- e1 + e1 */ + uECC_vli_modMult_fast(f1, t, f0, &curve_secp224r1); /* f1 <-- t * f0 */ + uECC_vli_modAdd(f1, f1, f1, curve_secp224r1.p, + num_words_secp224r1); /* f1 <-- f1 + f1 */ + uECC_vli_modAdd(f1, f1, f1, curve_secp224r1.p, + num_words_secp224r1); /* f1 <-- f1 + f1 */ +} + +/* Routine 3.2.5 RSS; from http://www.nsa.gov/ia/_files/nist-routines.pdf */ +static void mod_sqrt_secp224r1_rss(uECC_word_t *d1, uECC_word_t *e1, + uECC_word_t *f1, const uECC_word_t *d0, + const uECC_word_t *e0, const uECC_word_t *f0, + const bitcount_t j) { + bitcount_t i; + + uECC_vli_set(d1, d0, num_words_secp224r1); /* d1 <-- d0 */ + uECC_vli_set(e1, e0, num_words_secp224r1); /* e1 <-- e0 */ + uECC_vli_set(f1, f0, num_words_secp224r1); /* f1 <-- f0 */ + for (i = 1; i <= j; i++) { + mod_sqrt_secp224r1_rs(d1, e1, f1, d1, e1, f1); /* RS (d1,e1,f1,d1,e1,f1) */ + } +} + +/* Routine 3.2.6 RM; from http://www.nsa.gov/ia/_files/nist-routines.pdf */ +static void mod_sqrt_secp224r1_rm(uECC_word_t *d2, uECC_word_t *e2, + uECC_word_t *f2, const uECC_word_t *c, + const uECC_word_t *d0, const uECC_word_t *e0, + const uECC_word_t *d1, + const uECC_word_t *e1) { + uECC_word_t t1[num_words_secp224r1]; + uECC_word_t t2[num_words_secp224r1]; + + uECC_vli_modMult_fast(t1, e0, e1, &curve_secp224r1); /* t1 <-- e0 * e1 */ + uECC_vli_modMult_fast(t1, t1, c, &curve_secp224r1); /* t1 <-- t1 * c */ + /* t1 <-- p - t1 */ + uECC_vli_modSub(t1, curve_secp224r1.p, t1, curve_secp224r1.p, + num_words_secp224r1); + uECC_vli_modMult_fast(t2, d0, d1, &curve_secp224r1); /* t2 <-- d0 * d1 */ + uECC_vli_modAdd(t2, t2, t1, curve_secp224r1.p, + num_words_secp224r1); /* t2 <-- t2 + t1 */ + uECC_vli_modMult_fast(t1, d0, e1, &curve_secp224r1); /* t1 <-- d0 * e1 */ + uECC_vli_modMult_fast(e2, d1, e0, &curve_secp224r1); /* e2 <-- d1 * e0 */ + uECC_vli_modAdd(e2, e2, t1, curve_secp224r1.p, + num_words_secp224r1); /* e2 <-- e2 + t1 */ + uECC_vli_modSquare_fast(f2, e2, &curve_secp224r1); /* f2 <-- e2^2 */ + uECC_vli_modMult_fast(f2, f2, c, &curve_secp224r1); /* f2 <-- f2 * c */ + /* f2 <-- p - f2 */ + uECC_vli_modSub(f2, curve_secp224r1.p, f2, curve_secp224r1.p, + num_words_secp224r1); + uECC_vli_set(d2, t2, num_words_secp224r1); /* d2 <-- t2 */ +} + +/* Routine 3.2.7 RP; from http://www.nsa.gov/ia/_files/nist-routines.pdf */ +static void mod_sqrt_secp224r1_rp(uECC_word_t *d1, uECC_word_t *e1, + uECC_word_t *f1, const uECC_word_t *c, + const uECC_word_t *r) { + wordcount_t i; + wordcount_t pow2i = 1; + uECC_word_t d0[num_words_secp224r1]; + uECC_word_t e0[num_words_secp224r1] = {1}; /* e0 <-- 1 */ + uECC_word_t f0[num_words_secp224r1]; + + uECC_vli_set(d0, r, num_words_secp224r1); /* d0 <-- r */ + /* f0 <-- p - c */ + uECC_vli_modSub(f0, curve_secp224r1.p, c, curve_secp224r1.p, + num_words_secp224r1); + for (i = 0; i <= 6; i++) { + mod_sqrt_secp224r1_rss(d1, e1, f1, d0, e0, f0, + pow2i); /* RSS (d1,e1,f1,d0,e0,f0,2^i) */ + mod_sqrt_secp224r1_rm(d1, e1, f1, c, d1, e1, d0, + e0); /* RM (d1,e1,f1,c,d1,e1,d0,e0) */ + uECC_vli_set(d0, d1, num_words_secp224r1); /* d0 <-- d1 */ + uECC_vli_set(e0, e1, num_words_secp224r1); /* e0 <-- e1 */ + uECC_vli_set(f0, f1, num_words_secp224r1); /* f0 <-- f1 */ + pow2i *= 2; + } +} + +/* Compute a = sqrt(a) (mod curve_p). */ +/* Routine 3.2.8 mp_mod_sqrt_224; from + * http://www.nsa.gov/ia/_files/nist-routines.pdf */ +static void mod_sqrt_secp224r1(uECC_word_t *a, uECC_Curve curve) { + (void) curve; + bitcount_t i; + uECC_word_t e1[num_words_secp224r1]; + uECC_word_t f1[num_words_secp224r1]; + uECC_word_t d0[num_words_secp224r1]; + uECC_word_t e0[num_words_secp224r1]; + uECC_word_t f0[num_words_secp224r1]; + uECC_word_t d1[num_words_secp224r1]; + + /* s = a; using constant instead of random value */ + mod_sqrt_secp224r1_rp(d0, e0, f0, a, a); /* RP (d0, e0, f0, c, s) */ + mod_sqrt_secp224r1_rs(d1, e1, f1, d0, e0, + f0); /* RS (d1, e1, f1, d0, e0, f0) */ + for (i = 1; i <= 95; i++) { + uECC_vli_set(d0, d1, num_words_secp224r1); /* d0 <-- d1 */ + uECC_vli_set(e0, e1, num_words_secp224r1); /* e0 <-- e1 */ + uECC_vli_set(f0, f1, num_words_secp224r1); /* f0 <-- f1 */ + mod_sqrt_secp224r1_rs(d1, e1, f1, d0, e0, + f0); /* RS (d1, e1, f1, d0, e0, f0) */ + if (uECC_vli_isZero(d1, num_words_secp224r1)) { /* if d1 == 0 */ + break; + } + } + uECC_vli_modInv(f1, e0, curve_secp224r1.p, + num_words_secp224r1); /* f1 <-- 1 / e0 */ + uECC_vli_modMult_fast(a, d0, f1, &curve_secp224r1); /* a <-- d0 / e0 */ +} +#endif /* uECC_SUPPORT_COMPRESSED_POINT */ + +#if (uECC_OPTIMIZATION_LEVEL > 0) +/* Computes result = product % curve_p + from http://www.nsa.gov/ia/_files/nist-routines.pdf */ +#if uECC_WORD_SIZE == 1 +static void vli_mmod_fast_secp224r1(uint8_t *result, uint8_t *product) { + uint8_t tmp[num_words_secp224r1]; + int8_t carry; + + /* t */ + uECC_vli_set(result, product, num_words_secp224r1); + + /* s1 */ + tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0; + tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0; + tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0; + tmp[12] = product[28]; + tmp[13] = product[29]; + tmp[14] = product[30]; + tmp[15] = product[31]; + tmp[16] = product[32]; + tmp[17] = product[33]; + tmp[18] = product[34]; + tmp[19] = product[35]; + tmp[20] = product[36]; + tmp[21] = product[37]; + tmp[22] = product[38]; + tmp[23] = product[39]; + tmp[24] = product[40]; + tmp[25] = product[41]; + tmp[26] = product[42]; + tmp[27] = product[43]; + carry = uECC_vli_add(result, result, tmp, num_words_secp224r1); + + /* s2 */ + tmp[12] = product[44]; + tmp[13] = product[45]; + tmp[14] = product[46]; + tmp[15] = product[47]; + tmp[16] = product[48]; + tmp[17] = product[49]; + tmp[18] = product[50]; + tmp[19] = product[51]; + tmp[20] = product[52]; + tmp[21] = product[53]; + tmp[22] = product[54]; + tmp[23] = product[55]; + tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0; + carry += uECC_vli_add(result, result, tmp, num_words_secp224r1); + + /* d1 */ + tmp[0] = product[28]; + tmp[1] = product[29]; + tmp[2] = product[30]; + tmp[3] = product[31]; + tmp[4] = product[32]; + tmp[5] = product[33]; + tmp[6] = product[34]; + tmp[7] = product[35]; + tmp[8] = product[36]; + tmp[9] = product[37]; + tmp[10] = product[38]; + tmp[11] = product[39]; + tmp[12] = product[40]; + tmp[13] = product[41]; + tmp[14] = product[42]; + tmp[15] = product[43]; + tmp[16] = product[44]; + tmp[17] = product[45]; + tmp[18] = product[46]; + tmp[19] = product[47]; + tmp[20] = product[48]; + tmp[21] = product[49]; + tmp[22] = product[50]; + tmp[23] = product[51]; + tmp[24] = product[52]; + tmp[25] = product[53]; + tmp[26] = product[54]; + tmp[27] = product[55]; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); + + /* d2 */ + tmp[0] = product[44]; + tmp[1] = product[45]; + tmp[2] = product[46]; + tmp[3] = product[47]; + tmp[4] = product[48]; + tmp[5] = product[49]; + tmp[6] = product[50]; + tmp[7] = product[51]; + tmp[8] = product[52]; + tmp[9] = product[53]; + tmp[10] = product[54]; + tmp[11] = product[55]; + tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0; + tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0; + tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0; + tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); + + if (carry < 0) { + do { + carry += + uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1); + } while (carry < 0); + } else { + while (carry || uECC_vli_cmp_unsafe(curve_secp224r1.p, result, + num_words_secp224r1) != 1) { + carry -= + uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1); + } + } +} +#elif uECC_WORD_SIZE == 4 +static void vli_mmod_fast_secp224r1(uint32_t *result, uint32_t *product) { + uint32_t tmp[num_words_secp224r1]; + int carry; + + /* t */ + uECC_vli_set(result, product, num_words_secp224r1); + + /* s1 */ + tmp[0] = tmp[1] = tmp[2] = 0; + tmp[3] = product[7]; + tmp[4] = product[8]; + tmp[5] = product[9]; + tmp[6] = product[10]; + carry = uECC_vli_add(result, result, tmp, num_words_secp224r1); + + /* s2 */ + tmp[3] = product[11]; + tmp[4] = product[12]; + tmp[5] = product[13]; + tmp[6] = 0; + carry += uECC_vli_add(result, result, tmp, num_words_secp224r1); + + /* d1 */ + tmp[0] = product[7]; + tmp[1] = product[8]; + tmp[2] = product[9]; + tmp[3] = product[10]; + tmp[4] = product[11]; + tmp[5] = product[12]; + tmp[6] = product[13]; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); + + /* d2 */ + tmp[0] = product[11]; + tmp[1] = product[12]; + tmp[2] = product[13]; + tmp[3] = tmp[4] = tmp[5] = tmp[6] = 0; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); + + if (carry < 0) { + do { + carry += + uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1); + } while (carry < 0); + } else { + while (carry || uECC_vli_cmp_unsafe(curve_secp224r1.p, result, + num_words_secp224r1) != 1) { + carry -= + uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1); + } + } +} +#else +static void vli_mmod_fast_secp224r1(uint64_t *result, uint64_t *product) { + uint64_t tmp[num_words_secp224r1]; + int carry = 0; + + /* t */ + uECC_vli_set(result, product, num_words_secp224r1); + result[num_words_secp224r1 - 1] &= 0xffffffff; + + /* s1 */ + tmp[0] = 0; + tmp[1] = product[3] & 0xffffffff00000000ull; + tmp[2] = product[4]; + tmp[3] = product[5] & 0xffffffff; + uECC_vli_add(result, result, tmp, num_words_secp224r1); + + /* s2 */ + tmp[1] = product[5] & 0xffffffff00000000ull; + tmp[2] = product[6]; + tmp[3] = 0; + uECC_vli_add(result, result, tmp, num_words_secp224r1); + + /* d1 */ + tmp[0] = (product[3] >> 32) | (product[4] << 32); + tmp[1] = (product[4] >> 32) | (product[5] << 32); + tmp[2] = (product[5] >> 32) | (product[6] << 32); + tmp[3] = product[6] >> 32; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); + + /* d2 */ + tmp[0] = (product[5] >> 32) | (product[6] << 32); + tmp[1] = product[6] >> 32; + tmp[2] = tmp[3] = 0; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); + + if (carry < 0) { + do { + carry += + uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1); + } while (carry < 0); + } else { + while (uECC_vli_cmp_unsafe(curve_secp224r1.p, result, + num_words_secp224r1) != 1) { + uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1); + } + } +} +#endif /* uECC_WORD_SIZE */ +#endif /* (uECC_OPTIMIZATION_LEVEL > 0) */ + +#endif /* uECC_SUPPORTS_secp224r1 */ + +#if uECC_SUPPORTS_secp256r1 + +#if (uECC_OPTIMIZATION_LEVEL > 0) +static void vli_mmod_fast_secp256r1(uECC_word_t *result, uECC_word_t *product); +#endif + +static const struct uECC_Curve_t curve_secp256r1 = { + num_words_secp256r1, + num_bytes_secp256r1, + 256, /* num_n_bits */ + {BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, 00, 00, 00, 00), + BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00), + BYTES_TO_WORDS_8(01, 00, 00, 00, FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(51, 25, 63, FC, C2, CA, B9, F3), + BYTES_TO_WORDS_8(84, 9E, 17, A7, AD, FA, E6, BC), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_8(00, 00, 00, 00, FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(96, C2, 98, D8, 45, 39, A1, F4), + BYTES_TO_WORDS_8(A0, 33, EB, 2D, 81, 7D, 03, 77), + BYTES_TO_WORDS_8(F2, 40, A4, 63, E5, E6, BC, F8), + BYTES_TO_WORDS_8(47, 42, 2C, E1, F2, D1, 17, 6B), + + BYTES_TO_WORDS_8(F5, 51, BF, 37, 68, 40, B6, CB), + BYTES_TO_WORDS_8(CE, 5E, 31, 6B, 57, 33, CE, 2B), + BYTES_TO_WORDS_8(16, 9E, 0F, 7C, 4A, EB, E7, 8E), + BYTES_TO_WORDS_8(9B, 7F, 1A, FE, E2, 42, E3, 4F)}, + {BYTES_TO_WORDS_8(4B, 60, D2, 27, 3E, 3C, CE, 3B), + BYTES_TO_WORDS_8(F6, B0, 53, CC, B0, 06, 1D, 65), + BYTES_TO_WORDS_8(BC, 86, 98, 76, 55, BD, EB, B3), + BYTES_TO_WORDS_8(E7, 93, 3A, AA, D8, 35, C6, 5A)}, + &double_jacobian_default, +#if uECC_SUPPORT_COMPRESSED_POINT + &mod_sqrt_default, +#endif + &x_side_default, +#if (uECC_OPTIMIZATION_LEVEL > 0) + &vli_mmod_fast_secp256r1 +#endif +}; + +uECC_Curve uECC_secp256r1(void) { + return &curve_secp256r1; +} + +#if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256r1) +/* Computes result = product % curve_p + from http://www.nsa.gov/ia/_files/nist-routines.pdf */ +#if uECC_WORD_SIZE == 1 +static void vli_mmod_fast_secp256r1(uint8_t *result, uint8_t *product) { + uint8_t tmp[num_words_secp256r1]; + int8_t carry; + + /* t */ + uECC_vli_set(result, product, num_words_secp256r1); + + /* s1 */ + tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0; + tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0; + tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0; + tmp[12] = product[44]; + tmp[13] = product[45]; + tmp[14] = product[46]; + tmp[15] = product[47]; + tmp[16] = product[48]; + tmp[17] = product[49]; + tmp[18] = product[50]; + tmp[19] = product[51]; + tmp[20] = product[52]; + tmp[21] = product[53]; + tmp[22] = product[54]; + tmp[23] = product[55]; + tmp[24] = product[56]; + tmp[25] = product[57]; + tmp[26] = product[58]; + tmp[27] = product[59]; + tmp[28] = product[60]; + tmp[29] = product[61]; + tmp[30] = product[62]; + tmp[31] = product[63]; + carry = uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); + carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s2 */ + tmp[12] = product[48]; + tmp[13] = product[49]; + tmp[14] = product[50]; + tmp[15] = product[51]; + tmp[16] = product[52]; + tmp[17] = product[53]; + tmp[18] = product[54]; + tmp[19] = product[55]; + tmp[20] = product[56]; + tmp[21] = product[57]; + tmp[22] = product[58]; + tmp[23] = product[59]; + tmp[24] = product[60]; + tmp[25] = product[61]; + tmp[26] = product[62]; + tmp[27] = product[63]; + tmp[28] = tmp[29] = tmp[30] = tmp[31] = 0; + carry += uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); + carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s3 */ + tmp[0] = product[32]; + tmp[1] = product[33]; + tmp[2] = product[34]; + tmp[3] = product[35]; + tmp[4] = product[36]; + tmp[5] = product[37]; + tmp[6] = product[38]; + tmp[7] = product[39]; + tmp[8] = product[40]; + tmp[9] = product[41]; + tmp[10] = product[42]; + tmp[11] = product[43]; + tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0; + tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0; + tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0; + tmp[24] = product[56]; + tmp[25] = product[57]; + tmp[26] = product[58]; + tmp[27] = product[59]; + tmp[28] = product[60]; + tmp[29] = product[61]; + tmp[30] = product[62]; + tmp[31] = product[63]; + carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s4 */ + tmp[0] = product[36]; + tmp[1] = product[37]; + tmp[2] = product[38]; + tmp[3] = product[39]; + tmp[4] = product[40]; + tmp[5] = product[41]; + tmp[6] = product[42]; + tmp[7] = product[43]; + tmp[8] = product[44]; + tmp[9] = product[45]; + tmp[10] = product[46]; + tmp[11] = product[47]; + tmp[12] = product[52]; + tmp[13] = product[53]; + tmp[14] = product[54]; + tmp[15] = product[55]; + tmp[16] = product[56]; + tmp[17] = product[57]; + tmp[18] = product[58]; + tmp[19] = product[59]; + tmp[20] = product[60]; + tmp[21] = product[61]; + tmp[22] = product[62]; + tmp[23] = product[63]; + tmp[24] = product[52]; + tmp[25] = product[53]; + tmp[26] = product[54]; + tmp[27] = product[55]; + tmp[28] = product[32]; + tmp[29] = product[33]; + tmp[30] = product[34]; + tmp[31] = product[35]; + carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* d1 */ + tmp[0] = product[44]; + tmp[1] = product[45]; + tmp[2] = product[46]; + tmp[3] = product[47]; + tmp[4] = product[48]; + tmp[5] = product[49]; + tmp[6] = product[50]; + tmp[7] = product[51]; + tmp[8] = product[52]; + tmp[9] = product[53]; + tmp[10] = product[54]; + tmp[11] = product[55]; + tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0; + tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0; + tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0; + tmp[24] = product[32]; + tmp[25] = product[33]; + tmp[26] = product[34]; + tmp[27] = product[35]; + tmp[28] = product[40]; + tmp[29] = product[41]; + tmp[30] = product[42]; + tmp[31] = product[43]; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d2 */ + tmp[0] = product[48]; + tmp[1] = product[49]; + tmp[2] = product[50]; + tmp[3] = product[51]; + tmp[4] = product[52]; + tmp[5] = product[53]; + tmp[6] = product[54]; + tmp[7] = product[55]; + tmp[8] = product[56]; + tmp[9] = product[57]; + tmp[10] = product[58]; + tmp[11] = product[59]; + tmp[12] = product[60]; + tmp[13] = product[61]; + tmp[14] = product[62]; + tmp[15] = product[63]; + tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0; + tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0; + tmp[24] = product[36]; + tmp[25] = product[37]; + tmp[26] = product[38]; + tmp[27] = product[39]; + tmp[28] = product[44]; + tmp[29] = product[45]; + tmp[30] = product[46]; + tmp[31] = product[47]; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d3 */ + tmp[0] = product[52]; + tmp[1] = product[53]; + tmp[2] = product[54]; + tmp[3] = product[55]; + tmp[4] = product[56]; + tmp[5] = product[57]; + tmp[6] = product[58]; + tmp[7] = product[59]; + tmp[8] = product[60]; + tmp[9] = product[61]; + tmp[10] = product[62]; + tmp[11] = product[63]; + tmp[12] = product[32]; + tmp[13] = product[33]; + tmp[14] = product[34]; + tmp[15] = product[35]; + tmp[16] = product[36]; + tmp[17] = product[37]; + tmp[18] = product[38]; + tmp[19] = product[39]; + tmp[20] = product[40]; + tmp[21] = product[41]; + tmp[22] = product[42]; + tmp[23] = product[43]; + tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0; + tmp[28] = product[48]; + tmp[29] = product[49]; + tmp[30] = product[50]; + tmp[31] = product[51]; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d4 */ + tmp[0] = product[56]; + tmp[1] = product[57]; + tmp[2] = product[58]; + tmp[3] = product[59]; + tmp[4] = product[60]; + tmp[5] = product[61]; + tmp[6] = product[62]; + tmp[7] = product[63]; + tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0; + tmp[12] = product[36]; + tmp[13] = product[37]; + tmp[14] = product[38]; + tmp[15] = product[39]; + tmp[16] = product[40]; + tmp[17] = product[41]; + tmp[18] = product[42]; + tmp[19] = product[43]; + tmp[20] = product[44]; + tmp[21] = product[45]; + tmp[22] = product[46]; + tmp[23] = product[47]; + tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0; + tmp[28] = product[52]; + tmp[29] = product[53]; + tmp[30] = product[54]; + tmp[31] = product[55]; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + if (carry < 0) { + do { + carry += + uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1); + } while (carry < 0); + } else { + while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, + num_words_secp256r1) != 1) { + carry -= + uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1); + } + } +} +#elif uECC_WORD_SIZE == 4 +static void vli_mmod_fast_secp256r1(uint32_t *result, uint32_t *product) { + uint32_t tmp[num_words_secp256r1]; + int carry; + + /* t */ + uECC_vli_set(result, product, num_words_secp256r1); + + /* s1 */ + tmp[0] = tmp[1] = tmp[2] = 0; + tmp[3] = product[11]; + tmp[4] = product[12]; + tmp[5] = product[13]; + tmp[6] = product[14]; + tmp[7] = product[15]; + carry = (int) uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s2 */ + tmp[3] = product[12]; + tmp[4] = product[13]; + tmp[5] = product[14]; + tmp[6] = product[15]; + tmp[7] = 0; + carry += (int) uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s3 */ + tmp[0] = product[8]; + tmp[1] = product[9]; + tmp[2] = product[10]; + tmp[3] = tmp[4] = tmp[5] = 0; + tmp[6] = product[14]; + tmp[7] = product[15]; + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s4 */ + tmp[0] = product[9]; + tmp[1] = product[10]; + tmp[2] = product[11]; + tmp[3] = product[13]; + tmp[4] = product[14]; + tmp[5] = product[15]; + tmp[6] = product[13]; + tmp[7] = product[8]; + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* d1 */ + tmp[0] = product[11]; + tmp[1] = product[12]; + tmp[2] = product[13]; + tmp[3] = tmp[4] = tmp[5] = 0; + tmp[6] = product[8]; + tmp[7] = product[10]; + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d2 */ + tmp[0] = product[12]; + tmp[1] = product[13]; + tmp[2] = product[14]; + tmp[3] = product[15]; + tmp[4] = tmp[5] = 0; + tmp[6] = product[9]; + tmp[7] = product[11]; + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d3 */ + tmp[0] = product[13]; + tmp[1] = product[14]; + tmp[2] = product[15]; + tmp[3] = product[8]; + tmp[4] = product[9]; + tmp[5] = product[10]; + tmp[6] = 0; + tmp[7] = product[12]; + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d4 */ + tmp[0] = product[14]; + tmp[1] = product[15]; + tmp[2] = 0; + tmp[3] = product[9]; + tmp[4] = product[10]; + tmp[5] = product[11]; + tmp[6] = 0; + tmp[7] = product[13]; + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + if (carry < 0) { + do { + carry += + (int) uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1); + } while (carry < 0); + } else { + while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, + num_words_secp256r1) != 1) { + carry -= + (int) uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1); + } + } +} +#else +static void vli_mmod_fast_secp256r1(uint64_t *result, uint64_t *product) { + uint64_t tmp[num_words_secp256r1]; + int carry; + + /* t */ + uECC_vli_set(result, product, num_words_secp256r1); + + /* s1 */ + tmp[0] = 0; + tmp[1] = product[5] & 0xffffffff00000000U; + tmp[2] = product[6]; + tmp[3] = product[7]; + carry = (int) uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s2 */ + tmp[1] = product[6] << 32; + tmp[2] = (product[6] >> 32) | (product[7] << 32); + tmp[3] = product[7] >> 32; + carry += (int) uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s3 */ + tmp[0] = product[4]; + tmp[1] = product[5] & 0xffffffff; + tmp[2] = 0; + tmp[3] = product[7]; + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s4 */ + tmp[0] = (product[4] >> 32) | (product[5] << 32); + tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000U); + tmp[2] = product[7]; + tmp[3] = (product[6] >> 32) | (product[4] << 32); + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* d1 */ + tmp[0] = (product[5] >> 32) | (product[6] << 32); + tmp[1] = (product[6] >> 32); + tmp[2] = 0; + tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32); + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d2 */ + tmp[0] = product[6]; + tmp[1] = product[7]; + tmp[2] = 0; + tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000); + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d3 */ + tmp[0] = (product[6] >> 32) | (product[7] << 32); + tmp[1] = (product[7] >> 32) | (product[4] << 32); + tmp[2] = (product[4] >> 32) | (product[5] << 32); + tmp[3] = (product[6] << 32); + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d4 */ + tmp[0] = product[7]; + tmp[1] = product[4] & 0xffffffff00000000U; + tmp[2] = product[5]; + tmp[3] = product[6] & 0xffffffff00000000U; + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + if (carry < 0) { + do { + carry += + (int) uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1); + } while (carry < 0); + } else { + while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, + num_words_secp256r1) != 1) { + carry -= + (int) uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1); + } + } +} +#endif /* uECC_WORD_SIZE */ +#endif /* (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256r1) */ + +#endif /* uECC_SUPPORTS_secp256r1 */ + +#if uECC_SUPPORTS_secp256k1 + +static void double_jacobian_secp256k1(uECC_word_t *X1, uECC_word_t *Y1, + uECC_word_t *Z1, uECC_Curve curve); +static void x_side_secp256k1(uECC_word_t *result, const uECC_word_t *x, + uECC_Curve curve); +#if (uECC_OPTIMIZATION_LEVEL > 0) +static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product); +#endif + +static const struct uECC_Curve_t curve_secp256k1 = { + num_words_secp256k1, + num_bytes_secp256k1, + 256, /* num_n_bits */ + {BYTES_TO_WORDS_8(2F, FC, FF, FF, FE, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(41, 41, 36, D0, 8C, 5E, D2, BF), + BYTES_TO_WORDS_8(3B, A0, 48, AF, E6, DC, AE, BA), + BYTES_TO_WORDS_8(FE, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(98, 17, F8, 16, 5B, 81, F2, 59), + BYTES_TO_WORDS_8(D9, 28, CE, 2D, DB, FC, 9B, 02), + BYTES_TO_WORDS_8(07, 0B, 87, CE, 95, 62, A0, 55), + BYTES_TO_WORDS_8(AC, BB, DC, F9, 7E, 66, BE, 79), + + BYTES_TO_WORDS_8(B8, D4, 10, FB, 8F, D0, 47, 9C), + BYTES_TO_WORDS_8(19, 54, 85, A6, 48, B4, 17, FD), + BYTES_TO_WORDS_8(A8, 08, 11, 0E, FC, FB, A4, 5D), + BYTES_TO_WORDS_8(65, C4, A3, 26, 77, DA, 3A, 48)}, + {BYTES_TO_WORDS_8(07, 00, 00, 00, 00, 00, 00, 00), + BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00), + BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00), + BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00)}, + &double_jacobian_secp256k1, +#if uECC_SUPPORT_COMPRESSED_POINT + &mod_sqrt_default, +#endif + &x_side_secp256k1, +#if (uECC_OPTIMIZATION_LEVEL > 0) + &vli_mmod_fast_secp256k1 +#endif +}; + +uECC_Curve uECC_secp256k1(void) { + return &curve_secp256k1; +} + +/* Double in place */ +static void double_jacobian_secp256k1(uECC_word_t *X1, uECC_word_t *Y1, + uECC_word_t *Z1, uECC_Curve curve) { + /* t1 = X, t2 = Y, t3 = Z */ + uECC_word_t t4[num_words_secp256k1]; + uECC_word_t t5[num_words_secp256k1]; + + if (uECC_vli_isZero(Z1, num_words_secp256k1)) { + return; + } + + uECC_vli_modSquare_fast(t5, Y1, curve); /* t5 = y1^2 */ + uECC_vli_modMult_fast(t4, X1, t5, curve); /* t4 = x1*y1^2 = A */ + uECC_vli_modSquare_fast(X1, X1, curve); /* t1 = x1^2 */ + uECC_vli_modSquare_fast(t5, t5, curve); /* t5 = y1^4 */ + uECC_vli_modMult_fast(Z1, Y1, Z1, curve); /* t3 = y1*z1 = z3 */ + + uECC_vli_modAdd(Y1, X1, X1, curve->p, num_words_secp256k1); /* t2 = 2*x1^2 */ + uECC_vli_modAdd(Y1, Y1, X1, curve->p, num_words_secp256k1); /* t2 = 3*x1^2 */ + if (uECC_vli_testBit(Y1, 0)) { + uECC_word_t carry = uECC_vli_add(Y1, Y1, curve->p, num_words_secp256k1); + uECC_vli_rshift1(Y1, num_words_secp256k1); + Y1[num_words_secp256k1 - 1] |= carry << (uECC_WORD_BITS - 1); + } else { + uECC_vli_rshift1(Y1, num_words_secp256k1); + } + /* t2 = 3/2*(x1^2) = B */ + + uECC_vli_modSquare_fast(X1, Y1, curve); /* t1 = B^2 */ + uECC_vli_modSub(X1, X1, t4, curve->p, num_words_secp256k1); /* t1 = B^2 - A */ + uECC_vli_modSub(X1, X1, t4, curve->p, + num_words_secp256k1); /* t1 = B^2 - 2A = x3 */ + + uECC_vli_modSub(t4, t4, X1, curve->p, num_words_secp256k1); /* t4 = A - x3 */ + uECC_vli_modMult_fast(Y1, Y1, t4, curve); /* t2 = B * (A - x3) */ + uECC_vli_modSub(Y1, Y1, t5, curve->p, + num_words_secp256k1); /* t2 = B * (A - x3) - y1^4 = y3 */ +} + +/* Computes result = x^3 + b. result must not overlap x. */ +static void x_side_secp256k1(uECC_word_t *result, const uECC_word_t *x, + uECC_Curve curve) { + uECC_vli_modSquare_fast(result, x, curve); /* r = x^2 */ + uECC_vli_modMult_fast(result, result, x, curve); /* r = x^3 */ + uECC_vli_modAdd(result, result, curve->b, curve->p, + num_words_secp256k1); /* r = x^3 + b */ +} + +#if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256k1) +static void omega_mult_secp256k1(uECC_word_t *result, const uECC_word_t *right); +static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product) { + uECC_word_t tmp[2 * num_words_secp256k1]; + uECC_word_t carry; + + uECC_vli_clear(tmp, num_words_secp256k1); + uECC_vli_clear(tmp + num_words_secp256k1, num_words_secp256k1); + + omega_mult_secp256k1(tmp, + product + num_words_secp256k1); /* (Rq, q) = q * c */ + + carry = uECC_vli_add(result, product, tmp, + num_words_secp256k1); /* (C, r) = r + q */ + uECC_vli_clear(product, num_words_secp256k1); + omega_mult_secp256k1(product, tmp + num_words_secp256k1); /* Rq*c */ + carry += uECC_vli_add(result, result, product, + num_words_secp256k1); /* (C1, r) = r + Rq*c */ + + while (carry > 0) { + --carry; + uECC_vli_sub(result, result, curve_secp256k1.p, num_words_secp256k1); + } + if (uECC_vli_cmp_unsafe(result, curve_secp256k1.p, num_words_secp256k1) > 0) { + uECC_vli_sub(result, result, curve_secp256k1.p, num_words_secp256k1); + } +} + +#if uECC_WORD_SIZE == 1 +static void omega_mult_secp256k1(uint8_t *result, const uint8_t *right) { + /* Multiply by (2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */ + uECC_word_t r0 = 0; + uECC_word_t r1 = 0; + uECC_word_t r2 = 0; + wordcount_t k; + + /* Multiply by (2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */ + muladd(0xD1, right[0], &r0, &r1, &r2); + result[0] = r0; + r0 = r1; + r1 = r2; + /* r2 is still 0 */ + + for (k = 1; k < num_words_secp256k1; ++k) { + muladd(0x03, right[k - 1], &r0, &r1, &r2); + muladd(0xD1, right[k], &r0, &r1, &r2); + result[k] = r0; + r0 = r1; + r1 = r2; + r2 = 0; + } + muladd(0x03, right[num_words_secp256k1 - 1], &r0, &r1, &r2); + result[num_words_secp256k1] = r0; + result[num_words_secp256k1 + 1] = r1; + /* add the 2^32 multiple */ + result[4 + num_words_secp256k1] = + uECC_vli_add(result + 4, result + 4, right, num_words_secp256k1); +} +#elif uECC_WORD_SIZE == 4 +static void omega_mult_secp256k1(uint32_t *result, const uint32_t *right) { + /* Multiply by (2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */ + uint32_t carry = 0; + wordcount_t k; + + for (k = 0; k < num_words_secp256k1; ++k) { + uint64_t p = (uint64_t) 0x3D1 * right[k] + carry; + result[k] = (uint32_t) p; + carry = p >> 32; + } + result[num_words_secp256k1] = carry; + /* add the 2^32 multiple */ + result[1 + num_words_secp256k1] = + uECC_vli_add(result + 1, result + 1, right, num_words_secp256k1); +} +#else +static void omega_mult_secp256k1(uint64_t *result, const uint64_t *right) { + uECC_word_t r0 = 0; + uECC_word_t r1 = 0; + uECC_word_t r2 = 0; + wordcount_t k; + + /* Multiply by (2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */ + for (k = 0; k < num_words_secp256k1; ++k) { + muladd(0x1000003D1ull, right[k], &r0, &r1, &r2); + result[k] = r0; + r0 = r1; + r1 = r2; + r2 = 0; + } + result[num_words_secp256k1] = r0; +} +#endif /* uECC_WORD_SIZE */ +#endif /* (uECC_OPTIMIZATION_LEVEL > 0 && && !asm_mmod_fast_secp256k1) */ + +#endif /* uECC_SUPPORTS_secp256k1 */ + +#endif /* _UECC_CURVE_SPECIFIC_H_ */ + +/* Returns 1 if 'point' is the point at infinity, 0 otherwise. */ +#define EccPoint_isZero(point, curve) \ + uECC_vli_isZero((point), (wordcount_t) ((curve)->num_words * 2)) + +/* Point multiplication algorithm using Montgomery's ladder with co-Z +coordinates. From http://eprint.iacr.org/2011/338.pdf +*/ + +/* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */ +static void apply_z(uECC_word_t *X1, uECC_word_t *Y1, + const uECC_word_t *const Z, uECC_Curve curve) { + uECC_word_t t1[uECC_MAX_WORDS]; + + uECC_vli_modSquare_fast(t1, Z, curve); /* z^2 */ + uECC_vli_modMult_fast(X1, X1, t1, curve); /* x1 * z^2 */ + uECC_vli_modMult_fast(t1, t1, Z, curve); /* z^3 */ + uECC_vli_modMult_fast(Y1, Y1, t1, curve); /* y1 * z^3 */ +} + +/* P = (x1, y1) => 2P, (x2, y2) => P' */ +static void XYcZ_initial_double(uECC_word_t *X1, uECC_word_t *Y1, + uECC_word_t *X2, uECC_word_t *Y2, + const uECC_word_t *const initial_Z, + uECC_Curve curve) { + uECC_word_t z[uECC_MAX_WORDS]; + wordcount_t num_words = curve->num_words; + if (initial_Z) { + uECC_vli_set(z, initial_Z, num_words); + } else { + uECC_vli_clear(z, num_words); + z[0] = 1; + } + + uECC_vli_set(X2, X1, num_words); + uECC_vli_set(Y2, Y1, num_words); + + apply_z(X1, Y1, z, curve); + curve->double_jacobian(X1, Y1, z, curve); + apply_z(X2, Y2, z, curve); +} + +/* Input P = (x1, y1, Z), Q = (x2, y2, Z) + Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3) + or P => P', Q => P + Q +*/ +static void XYcZ_add(uECC_word_t *X1, uECC_word_t *Y1, uECC_word_t *X2, + uECC_word_t *Y2, uECC_Curve curve) { + /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ + uECC_word_t t5[uECC_MAX_WORDS] = {0}; + wordcount_t num_words = curve->num_words; + + uECC_vli_modSub(t5, X2, X1, curve->p, num_words); /* t5 = x2 - x1 */ + uECC_vli_modSquare_fast(t5, t5, curve); /* t5 = (x2 - x1)^2 = A */ + uECC_vli_modMult_fast(X1, X1, t5, curve); /* t1 = x1*A = B */ + uECC_vli_modMult_fast(X2, X2, t5, curve); /* t3 = x2*A = C */ + uECC_vli_modSub(Y2, Y2, Y1, curve->p, num_words); /* t4 = y2 - y1 */ + uECC_vli_modSquare_fast(t5, Y2, curve); /* t5 = (y2 - y1)^2 = D */ + + uECC_vli_modSub(t5, t5, X1, curve->p, num_words); /* t5 = D - B */ + uECC_vli_modSub(t5, t5, X2, curve->p, num_words); /* t5 = D - B - C = x3 */ + uECC_vli_modSub(X2, X2, X1, curve->p, num_words); /* t3 = C - B */ + uECC_vli_modMult_fast(Y1, Y1, X2, curve); /* t2 = y1*(C - B) */ + uECC_vli_modSub(X2, X1, t5, curve->p, num_words); /* t3 = B - x3 */ + uECC_vli_modMult_fast(Y2, Y2, X2, curve); /* t4 = (y2 - y1)*(B - x3) */ + uECC_vli_modSub(Y2, Y2, Y1, curve->p, num_words); /* t4 = y3 */ + + uECC_vli_set(X2, t5, num_words); +} + +/* Input P = (x1, y1, Z), Q = (x2, y2, Z) + Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3) + or P => P - Q, Q => P + Q +*/ +static void XYcZ_addC(uECC_word_t *X1, uECC_word_t *Y1, uECC_word_t *X2, + uECC_word_t *Y2, uECC_Curve curve) { + /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ + uECC_word_t t5[uECC_MAX_WORDS] = {0}; + uECC_word_t t6[uECC_MAX_WORDS]; + uECC_word_t t7[uECC_MAX_WORDS]; + wordcount_t num_words = curve->num_words; + + uECC_vli_modSub(t5, X2, X1, curve->p, num_words); /* t5 = x2 - x1 */ + uECC_vli_modSquare_fast(t5, t5, curve); /* t5 = (x2 - x1)^2 = A */ + uECC_vli_modMult_fast(X1, X1, t5, curve); /* t1 = x1*A = B */ + uECC_vli_modMult_fast(X2, X2, t5, curve); /* t3 = x2*A = C */ + uECC_vli_modAdd(t5, Y2, Y1, curve->p, num_words); /* t5 = y2 + y1 */ + uECC_vli_modSub(Y2, Y2, Y1, curve->p, num_words); /* t4 = y2 - y1 */ + + uECC_vli_modSub(t6, X2, X1, curve->p, num_words); /* t6 = C - B */ + uECC_vli_modMult_fast(Y1, Y1, t6, curve); /* t2 = y1 * (C - B) = E */ + uECC_vli_modAdd(t6, X1, X2, curve->p, num_words); /* t6 = B + C */ + uECC_vli_modSquare_fast(X2, Y2, curve); /* t3 = (y2 - y1)^2 = D */ + uECC_vli_modSub(X2, X2, t6, curve->p, num_words); /* t3 = D - (B + C) = x3 */ + + uECC_vli_modSub(t7, X1, X2, curve->p, num_words); /* t7 = B - x3 */ + uECC_vli_modMult_fast(Y2, Y2, t7, curve); /* t4 = (y2 - y1)*(B - x3) */ + uECC_vli_modSub(Y2, Y2, Y1, curve->p, + num_words); /* t4 = (y2 - y1)*(B - x3) - E = y3 */ + + uECC_vli_modSquare_fast(t7, t5, curve); /* t7 = (y2 + y1)^2 = F */ + uECC_vli_modSub(t7, t7, t6, curve->p, num_words); /* t7 = F - (B + C) = x3' */ + uECC_vli_modSub(t6, t7, X1, curve->p, num_words); /* t6 = x3' - B */ + uECC_vli_modMult_fast(t6, t6, t5, curve); /* t6 = (y2+y1)*(x3' - B) */ + uECC_vli_modSub(Y1, t6, Y1, curve->p, + num_words); /* t2 = (y2+y1)*(x3' - B) - E = y3' */ + + uECC_vli_set(X1, t7, num_words); +} + +/* result may overlap point. */ +static void EccPoint_mult(uECC_word_t *result, const uECC_word_t *point, + const uECC_word_t *scalar, + const uECC_word_t *initial_Z, bitcount_t num_bits, + uECC_Curve curve) { + /* R0 and R1 */ + uECC_word_t Rx[2][uECC_MAX_WORDS]; + uECC_word_t Ry[2][uECC_MAX_WORDS]; + uECC_word_t z[uECC_MAX_WORDS]; + bitcount_t i; + uECC_word_t nb; + wordcount_t num_words = curve->num_words; + + uECC_vli_set(Rx[1], point, num_words); + uECC_vli_set(Ry[1], point + num_words, num_words); + + XYcZ_initial_double(Rx[1], Ry[1], Rx[0], Ry[0], initial_Z, curve); + + for (i = num_bits - 2; i > 0; --i) { + nb = !uECC_vli_testBit(scalar, i); + XYcZ_addC(Rx[1 - nb], Ry[1 - nb], Rx[nb], Ry[nb], curve); + XYcZ_add(Rx[nb], Ry[nb], Rx[1 - nb], Ry[1 - nb], curve); + } + + nb = !uECC_vli_testBit(scalar, 0); + XYcZ_addC(Rx[1 - nb], Ry[1 - nb], Rx[nb], Ry[nb], curve); + + /* Find final 1/Z value. */ + uECC_vli_modSub(z, Rx[1], Rx[0], curve->p, num_words); /* X1 - X0 */ + uECC_vli_modMult_fast(z, z, Ry[1 - nb], curve); /* Yb * (X1 - X0) */ + uECC_vli_modMult_fast(z, z, point, curve); /* xP * Yb * (X1 - X0) */ + uECC_vli_modInv(z, z, curve->p, num_words); /* 1 / (xP * Yb * (X1 - X0)) */ + /* yP / (xP * Yb * (X1 - X0)) */ + uECC_vli_modMult_fast(z, z, point + num_words, curve); + uECC_vli_modMult_fast(z, z, Rx[1 - nb], + curve); /* Xb * yP / (xP * Yb * (X1 - X0)) */ + /* End 1/Z calculation */ + + XYcZ_add(Rx[nb], Ry[nb], Rx[1 - nb], Ry[1 - nb], curve); + apply_z(Rx[0], Ry[0], z, curve); + + uECC_vli_set(result, Rx[0], num_words); + uECC_vli_set(result + num_words, Ry[0], num_words); +} + +static uECC_word_t regularize_k(const uECC_word_t *const k, uECC_word_t *k0, + uECC_word_t *k1, uECC_Curve curve) { + wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits); + bitcount_t num_n_bits = curve->num_n_bits; + uECC_word_t carry = + uECC_vli_add(k0, k, curve->n, num_n_words) || + (num_n_bits < ((bitcount_t) num_n_words * uECC_WORD_SIZE * 8) && + uECC_vli_testBit(k0, num_n_bits)); + uECC_vli_add(k1, k0, curve->n, num_n_words); + return carry; +} + +/* Generates a random integer in the range 0 < random < top. + Both random and top have num_words words. */ +uECC_VLI_API int uECC_generate_random_int(uECC_word_t *random, + const uECC_word_t *top, + wordcount_t num_words) { + uECC_word_t mask = (uECC_word_t) -1; + uECC_word_t tries; + bitcount_t num_bits = uECC_vli_numBits(top, num_words); + + if (!g_rng_function) { + return 0; + } + + for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) { + if (!g_rng_function((uint8_t *) random, + (unsigned int) (num_words * uECC_WORD_SIZE))) { + return 0; + } + random[num_words - 1] &= + mask >> ((bitcount_t) (num_words * uECC_WORD_SIZE * 8 - num_bits)); + if (!uECC_vli_isZero(random, num_words) && + uECC_vli_cmp(top, random, num_words) == 1) { + return 1; + } + } + return 0; +} + +static uECC_word_t EccPoint_compute_public_key(uECC_word_t *result, + uECC_word_t *private_key, + uECC_Curve curve) { + uECC_word_t tmp1[uECC_MAX_WORDS]; + uECC_word_t tmp2[uECC_MAX_WORDS]; + uECC_word_t *p2[2] = {tmp1, tmp2}; + uECC_word_t *initial_Z = 0; + uECC_word_t carry; + + /* Regularize the bitcount for the private key so that attackers cannot use a + side channel attack to learn the number of leading zeros. */ + carry = regularize_k(private_key, tmp1, tmp2, curve); + + /* If an RNG function was specified, try to get a random initial Z value to + improve protection against side-channel attacks. */ + if (g_rng_function) { + if (!uECC_generate_random_int(p2[carry], curve->p, curve->num_words)) { + return 0; + } + initial_Z = p2[carry]; + } + EccPoint_mult(result, curve->G, p2[!carry], initial_Z, + (bitcount_t) (curve->num_n_bits + 1), curve); + + if (EccPoint_isZero(result, curve)) { + return 0; + } + return 1; +} + +#if uECC_WORD_SIZE == 1 + +uECC_VLI_API void uECC_vli_nativeToBytes(uint8_t *bytes, int num_bytes, + const uint8_t *native) { + wordcount_t i; + for (i = 0; i < num_bytes; ++i) { + bytes[i] = native[(num_bytes - 1) - i]; + } +} + +uECC_VLI_API void uECC_vli_bytesToNative(uint8_t *native, const uint8_t *bytes, + int num_bytes) { + uECC_vli_nativeToBytes(native, num_bytes, bytes); +} + +#else + +uECC_VLI_API void uECC_vli_nativeToBytes(uint8_t *bytes, int num_bytes, + const uECC_word_t *native) { + int i; + for (i = 0; i < num_bytes; ++i) { + unsigned b = (unsigned) (num_bytes - 1 - i); + bytes[i] = + (uint8_t) (native[b / uECC_WORD_SIZE] >> (8 * (b % uECC_WORD_SIZE))); + } +} + +uECC_VLI_API void uECC_vli_bytesToNative(uECC_word_t *native, + const uint8_t *bytes, int num_bytes) { + int i; + uECC_vli_clear(native, (wordcount_t) ((num_bytes + (uECC_WORD_SIZE - 1)) / + uECC_WORD_SIZE)); + for (i = 0; i < num_bytes; ++i) { + unsigned b = (unsigned) (num_bytes - 1 - i); + native[b / uECC_WORD_SIZE] |= (uECC_word_t) bytes[i] + << (8 * (b % uECC_WORD_SIZE)); + } +} + +#endif /* uECC_WORD_SIZE */ + +int uECC_make_key(uint8_t *public_key, uint8_t *private_key, uECC_Curve curve) { +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + uECC_word_t *_private = (uECC_word_t *) private_key; + uECC_word_t *_public = (uECC_word_t *) public_key; +#else + uECC_word_t _private[uECC_MAX_WORDS]; + uECC_word_t _public[uECC_MAX_WORDS * 2]; +#endif + uECC_word_t tries; + + for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) { + if (!uECC_generate_random_int(_private, curve->n, + BITS_TO_WORDS(curve->num_n_bits))) { + return 0; + } + + if (EccPoint_compute_public_key(_public, _private, curve)) { +#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0 + uECC_vli_nativeToBytes(private_key, BITS_TO_BYTES(curve->num_n_bits), + _private); + uECC_vli_nativeToBytes(public_key, curve->num_bytes, _public); + uECC_vli_nativeToBytes(public_key + curve->num_bytes, curve->num_bytes, + _public + curve->num_words); +#endif + return 1; + } + } + return 0; +} + +int uECC_shared_secret(const uint8_t *public_key, const uint8_t *private_key, + uint8_t *secret, uECC_Curve curve) { + uECC_word_t _public[uECC_MAX_WORDS * 2]; + uECC_word_t _private[uECC_MAX_WORDS]; + + uECC_word_t tmp[uECC_MAX_WORDS]; + uECC_word_t *p2[2] = {_private, tmp}; + uECC_word_t *initial_Z = 0; + uECC_word_t carry; + wordcount_t num_words = curve->num_words; + wordcount_t num_bytes = curve->num_bytes; + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + bcopy((uint8_t *) _private, private_key, num_bytes); + bcopy((uint8_t *) _public, public_key, num_bytes * 2); +#else + uECC_vli_bytesToNative(_private, private_key, + BITS_TO_BYTES(curve->num_n_bits)); + uECC_vli_bytesToNative(_public, public_key, num_bytes); + uECC_vli_bytesToNative(_public + num_words, public_key + num_bytes, + num_bytes); +#endif + + /* Regularize the bitcount for the private key so that attackers cannot use a + side channel attack to learn the number of leading zeros. */ + carry = regularize_k(_private, _private, tmp, curve); + + /* If an RNG function was specified, try to get a random initial Z value to + improve protection against side-channel attacks. */ + if (g_rng_function) { + if (!uECC_generate_random_int(p2[carry], curve->p, num_words)) { + return 0; + } + initial_Z = p2[carry]; + } + + EccPoint_mult(_public, _public, p2[!carry], initial_Z, + (bitcount_t) (curve->num_n_bits + 1), curve); +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + bcopy((uint8_t *) secret, (uint8_t *) _public, num_bytes); +#else + uECC_vli_nativeToBytes(secret, num_bytes, _public); +#endif + return !EccPoint_isZero(_public, curve); +} + +#if uECC_SUPPORT_COMPRESSED_POINT +void uECC_compress(const uint8_t *public_key, uint8_t *compressed, + uECC_Curve curve) { + wordcount_t i; + for (i = 0; i < curve->num_bytes; ++i) { + compressed[i + 1] = public_key[i]; + } +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + compressed[0] = 2 + (public_key[curve->num_bytes] & 0x01); +#else + compressed[0] = 2 + (public_key[curve->num_bytes * 2 - 1] & 0x01); +#endif +} + +void uECC_decompress(const uint8_t *compressed, uint8_t *public_key, + uECC_Curve curve) { +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + uECC_word_t *point = (uECC_word_t *) public_key; +#else + uECC_word_t point[uECC_MAX_WORDS * 2]; +#endif + uECC_word_t *y = point + curve->num_words; +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + bcopy(public_key, compressed + 1, curve->num_bytes); +#else + uECC_vli_bytesToNative(point, compressed + 1, curve->num_bytes); +#endif + curve->x_side(y, point, curve); + curve->mod_sqrt(y, curve); + + if ((uint8_t) (y[0] & 0x01) != (compressed[0] & 0x01)) { + uECC_vli_sub(y, curve->p, y, curve->num_words); + } + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0 + uECC_vli_nativeToBytes(public_key, curve->num_bytes, point); + uECC_vli_nativeToBytes(public_key + curve->num_bytes, curve->num_bytes, y); +#endif +} +#endif /* uECC_SUPPORT_COMPRESSED_POINT */ + +uECC_VLI_API int uECC_valid_point(const uECC_word_t *point, uECC_Curve curve) { + uECC_word_t tmp1[uECC_MAX_WORDS]; + uECC_word_t tmp2[uECC_MAX_WORDS]; + wordcount_t num_words = curve->num_words; + + /* The point at infinity is invalid. */ + if (EccPoint_isZero(point, curve)) { + return 0; + } + + /* x and y must be smaller than p. */ + if (uECC_vli_cmp_unsafe(curve->p, point, num_words) != 1 || + uECC_vli_cmp_unsafe(curve->p, point + num_words, num_words) != 1) { + return 0; + } + + uECC_vli_modSquare_fast(tmp1, point + num_words, curve); + curve->x_side(tmp2, point, curve); /* tmp2 = x^3 + ax + b */ + + /* Make sure that y^2 == x^3 + ax + b */ + return (int) (uECC_vli_equal(tmp1, tmp2, num_words)); +} + +int uECC_valid_public_key(const uint8_t *public_key, uECC_Curve curve) { +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + uECC_word_t *_public = (uECC_word_t *) public_key; +#else + uECC_word_t _public[uECC_MAX_WORDS * 2]; +#endif + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0 + uECC_vli_bytesToNative(_public, public_key, curve->num_bytes); + uECC_vli_bytesToNative(_public + curve->num_words, + public_key + curve->num_bytes, curve->num_bytes); +#endif + return uECC_valid_point(_public, curve); +} + +int uECC_compute_public_key(const uint8_t *private_key, uint8_t *public_key, + uECC_Curve curve) { +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + uECC_word_t *_private = (uECC_word_t *) private_key; + uECC_word_t *_public = (uECC_word_t *) public_key; +#else + uECC_word_t _private[uECC_MAX_WORDS]; + uECC_word_t _public[uECC_MAX_WORDS * 2]; +#endif + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0 + uECC_vli_bytesToNative(_private, private_key, + BITS_TO_BYTES(curve->num_n_bits)); +#endif + + /* Make sure the private key is in the range [1, n-1]. */ + if (uECC_vli_isZero(_private, BITS_TO_WORDS(curve->num_n_bits))) { + return 0; + } + + if (uECC_vli_cmp(curve->n, _private, BITS_TO_WORDS(curve->num_n_bits)) != 1) { + return 0; + } + + /* Compute public key. */ + if (!EccPoint_compute_public_key(_public, _private, curve)) { + return 0; + } + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0 + uECC_vli_nativeToBytes(public_key, curve->num_bytes, _public); + uECC_vli_nativeToBytes(public_key + curve->num_bytes, curve->num_bytes, + _public + curve->num_words); +#endif + return 1; +} + +/* -------- ECDSA code -------- */ + +static void bits2int(uECC_word_t *native, const uint8_t *bits, + unsigned bits_size, uECC_Curve curve) { + unsigned num_n_bytes = (unsigned) BITS_TO_BYTES(curve->num_n_bits); + unsigned num_n_words = (unsigned) BITS_TO_WORDS(curve->num_n_bits); + int shift; + uECC_word_t carry; + uECC_word_t *ptr; + + if (bits_size > num_n_bytes) { + bits_size = num_n_bytes; + } + + uECC_vli_clear(native, (wordcount_t) num_n_words); +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + bcopy((uint8_t *) native, bits, bits_size); +#else + uECC_vli_bytesToNative(native, bits, (int) bits_size); +#endif + if (bits_size * 8 <= (unsigned) curve->num_n_bits) { + return; + } + shift = (int) bits_size * 8 - curve->num_n_bits; + carry = 0; + ptr = native + num_n_words; + while (ptr-- > native) { + uECC_word_t temp = *ptr; + *ptr = (temp >> shift) | carry; + carry = temp << (uECC_WORD_BITS - shift); + } + + /* Reduce mod curve_n */ + if (uECC_vli_cmp_unsafe(curve->n, native, (wordcount_t) num_n_words) != 1) { + uECC_vli_sub(native, native, curve->n, (wordcount_t) num_n_words); + } +} + +static int uECC_sign_with_k_internal(const uint8_t *private_key, + const uint8_t *message_hash, + unsigned hash_size, uECC_word_t *k, + uint8_t *signature, uECC_Curve curve) { + uECC_word_t tmp[uECC_MAX_WORDS]; + uECC_word_t s[uECC_MAX_WORDS]; + uECC_word_t *k2[2] = {tmp, s}; + uECC_word_t *initial_Z = 0; +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + uECC_word_t *p = (uECC_word_t *) signature; +#else + uECC_word_t p[uECC_MAX_WORDS * 2]; +#endif + uECC_word_t carry; + wordcount_t num_words = curve->num_words; + wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits); + bitcount_t num_n_bits = curve->num_n_bits; + + /* Make sure 0 < k < curve_n */ + if (uECC_vli_isZero(k, num_words) || + uECC_vli_cmp(curve->n, k, num_n_words) != 1) { + return 0; + } + + carry = regularize_k(k, tmp, s, curve); + /* If an RNG function was specified, try to get a random initial Z value to + improve protection against side-channel attacks. */ + if (g_rng_function) { + if (!uECC_generate_random_int(k2[carry], curve->p, num_words)) { + return 0; + } + initial_Z = k2[carry]; + } + EccPoint_mult(p, curve->G, k2[!carry], initial_Z, + (bitcount_t) (num_n_bits + 1), curve); + if (uECC_vli_isZero(p, num_words)) { + return 0; + } + + /* If an RNG function was specified, get a random number + to prevent side channel analysis of k. */ + if (!g_rng_function) { + uECC_vli_clear(tmp, num_n_words); + tmp[0] = 1; + } else if (!uECC_generate_random_int(tmp, curve->n, num_n_words)) { + return 0; + } + + /* Prevent side channel analysis of uECC_vli_modInv() to determine + bits of k / the private key by premultiplying by a random number */ + uECC_vli_modMult(k, k, tmp, curve->n, num_n_words); /* k' = rand * k */ + uECC_vli_modInv(k, k, curve->n, num_n_words); /* k = 1 / k' */ + uECC_vli_modMult(k, k, tmp, curve->n, num_n_words); /* k = 1 / k */ + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0 + uECC_vli_nativeToBytes(signature, curve->num_bytes, p); /* store r */ +#endif + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + bcopy((uint8_t *) tmp, private_key, BITS_TO_BYTES(curve->num_n_bits)); +#else + uECC_vli_bytesToNative(tmp, private_key, + BITS_TO_BYTES(curve->num_n_bits)); /* tmp = d */ +#endif + + s[num_n_words - 1] = 0; + uECC_vli_set(s, p, num_words); + uECC_vli_modMult(s, tmp, s, curve->n, num_n_words); /* s = r*d */ + + bits2int(tmp, message_hash, hash_size, curve); + uECC_vli_modAdd(s, tmp, s, curve->n, num_n_words); /* s = e + r*d */ + uECC_vli_modMult(s, s, k, curve->n, num_n_words); /* s = (e + r*d) / k */ + if (uECC_vli_numBits(s, num_n_words) > (bitcount_t) curve->num_bytes * 8) { + return 0; + } +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + bcopy((uint8_t *) signature + curve->num_bytes, (uint8_t *) s, + curve->num_bytes); +#else + uECC_vli_nativeToBytes(signature + curve->num_bytes, curve->num_bytes, s); +#endif + return 1; +} + +#if 0 +/* For testing - sign with an explicitly specified k value */ +int uECC_sign_with_k(const uint8_t *private_key, const uint8_t *message_hash, + unsigned hash_size, const uint8_t *k, uint8_t *signature, + uECC_Curve curve) { + uECC_word_t k2[uECC_MAX_WORDS]; + bits2int(k2, k, (unsigned) BITS_TO_BYTES(curve->num_n_bits), curve); + return uECC_sign_with_k_internal(private_key, message_hash, hash_size, k2, + signature, curve); +} +#endif + +int uECC_sign(const uint8_t *private_key, const uint8_t *message_hash, + unsigned hash_size, uint8_t *signature, uECC_Curve curve) { + uECC_word_t k[uECC_MAX_WORDS]; + uECC_word_t tries; + + for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) { + if (!uECC_generate_random_int(k, curve->n, + BITS_TO_WORDS(curve->num_n_bits))) { + return 0; + } + + if (uECC_sign_with_k_internal(private_key, message_hash, hash_size, k, + signature, curve)) { + return 1; + } + } + return 0; +} + +/* Compute an HMAC using K as a key (as in RFC 6979). Note that K is always + the same size as the hash result size. */ +static void HMAC_init(const uECC_HashContext *hash_context, const uint8_t *K) { + uint8_t *pad = hash_context->tmp + 2 * hash_context->result_size; + unsigned i; + for (i = 0; i < hash_context->result_size; ++i) pad[i] = K[i] ^ 0x36; + for (; i < hash_context->block_size; ++i) pad[i] = 0x36; + + hash_context->init_hash(hash_context); + hash_context->update_hash(hash_context, pad, hash_context->block_size); +} + +static void HMAC_update(const uECC_HashContext *hash_context, + const uint8_t *message, unsigned message_size) { + hash_context->update_hash(hash_context, message, message_size); +} + +static void HMAC_finish(const uECC_HashContext *hash_context, const uint8_t *K, + uint8_t *result) { + uint8_t *pad = hash_context->tmp + 2 * hash_context->result_size; + unsigned i; + for (i = 0; i < hash_context->result_size; ++i) pad[i] = K[i] ^ 0x5c; + for (; i < hash_context->block_size; ++i) pad[i] = 0x5c; + + hash_context->finish_hash(hash_context, result); + + hash_context->init_hash(hash_context); + hash_context->update_hash(hash_context, pad, hash_context->block_size); + hash_context->update_hash(hash_context, result, hash_context->result_size); + hash_context->finish_hash(hash_context, result); +} + +/* V = HMAC_K(V) */ +static void update_V(const uECC_HashContext *hash_context, uint8_t *K, + uint8_t *V) { + HMAC_init(hash_context, K); + HMAC_update(hash_context, V, hash_context->result_size); + HMAC_finish(hash_context, K, V); +} + +/* Deterministic signing, similar to RFC 6979. Differences are: + * We just use H(m) directly rather than bits2octets(H(m)) + (it is not reduced modulo curve_n). + * We generate a value for k (aka T) directly rather than converting + endianness. + + Layout of hash_context->tmp: | | (1 byte overlapped 0x00 or 0x01) / + */ +int uECC_sign_deterministic(const uint8_t *private_key, + const uint8_t *message_hash, unsigned hash_size, + const uECC_HashContext *hash_context, + uint8_t *signature, uECC_Curve curve) { + uint8_t *K = hash_context->tmp; + uint8_t *V = K + hash_context->result_size; + wordcount_t num_bytes = curve->num_bytes; + wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits); + bitcount_t num_n_bits = curve->num_n_bits; + uECC_word_t tries; + unsigned i; + for (i = 0; i < hash_context->result_size; ++i) { + V[i] = 0x01; + K[i] = 0; + } + + /* K = HMAC_K(V || 0x00 || int2octets(x) || h(m)) */ + HMAC_init(hash_context, K); + V[hash_context->result_size] = 0x00; + HMAC_update(hash_context, V, hash_context->result_size + 1); + HMAC_update(hash_context, private_key, (unsigned int) num_bytes); + HMAC_update(hash_context, message_hash, hash_size); + HMAC_finish(hash_context, K, K); + + update_V(hash_context, K, V); + + /* K = HMAC_K(V || 0x01 || int2octets(x) || h(m)) */ + HMAC_init(hash_context, K); + V[hash_context->result_size] = 0x01; + HMAC_update(hash_context, V, hash_context->result_size + 1); + HMAC_update(hash_context, private_key, (unsigned int) num_bytes); + HMAC_update(hash_context, message_hash, hash_size); + HMAC_finish(hash_context, K, K); + + update_V(hash_context, K, V); + + for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) { + uECC_word_t T[uECC_MAX_WORDS]; + uint8_t *T_ptr = (uint8_t *) T; + wordcount_t T_bytes = 0; + for (;;) { + update_V(hash_context, K, V); + for (i = 0; i < hash_context->result_size; ++i) { + T_ptr[T_bytes++] = V[i]; + if (T_bytes >= num_n_words * uECC_WORD_SIZE) { + goto filled; + } + } + } + filled: + if ((bitcount_t) num_n_words * uECC_WORD_SIZE * 8 > num_n_bits) { + uECC_word_t mask = (uECC_word_t) -1; + T[num_n_words - 1] &= + mask >> + ((bitcount_t) (num_n_words * uECC_WORD_SIZE * 8 - num_n_bits)); + } + + if (uECC_sign_with_k_internal(private_key, message_hash, hash_size, T, + signature, curve)) { + return 1; + } + + /* K = HMAC_K(V || 0x00) */ + HMAC_init(hash_context, K); + V[hash_context->result_size] = 0x00; + HMAC_update(hash_context, V, hash_context->result_size + 1); + HMAC_finish(hash_context, K, K); + + update_V(hash_context, K, V); + } + return 0; +} + +static bitcount_t smax(bitcount_t a, bitcount_t b) { + return (a > b ? a : b); +} + +int uECC_verify(const uint8_t *public_key, const uint8_t *message_hash, + unsigned hash_size, const uint8_t *signature, + uECC_Curve curve) { + uECC_word_t u1[uECC_MAX_WORDS], u2[uECC_MAX_WORDS]; + uECC_word_t z[uECC_MAX_WORDS]; + uECC_word_t sum[uECC_MAX_WORDS * 2]; + uECC_word_t rx[uECC_MAX_WORDS]; + uECC_word_t ry[uECC_MAX_WORDS]; + uECC_word_t tx[uECC_MAX_WORDS]; + uECC_word_t ty[uECC_MAX_WORDS]; + uECC_word_t tz[uECC_MAX_WORDS]; + const uECC_word_t *points[4]; + const uECC_word_t *point; + bitcount_t num_bits; + bitcount_t i; +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + uECC_word_t *_public = (uECC_word_t *) public_key; +#else + uECC_word_t _public[uECC_MAX_WORDS * 2]; +#endif + uECC_word_t r[uECC_MAX_WORDS], s[uECC_MAX_WORDS]; + wordcount_t num_words = curve->num_words; + wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits); + + rx[num_n_words - 1] = 0; + r[num_n_words - 1] = 0; + s[num_n_words - 1] = 0; + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + bcopy((uint8_t *) r, signature, curve->num_bytes); + bcopy((uint8_t *) s, signature + curve->num_bytes, curve->num_bytes); +#else + uECC_vli_bytesToNative(_public, public_key, curve->num_bytes); + uECC_vli_bytesToNative(_public + num_words, public_key + curve->num_bytes, + curve->num_bytes); + uECC_vli_bytesToNative(r, signature, curve->num_bytes); + uECC_vli_bytesToNative(s, signature + curve->num_bytes, curve->num_bytes); +#endif + + /* r, s must not be 0. */ + if (uECC_vli_isZero(r, num_words) || uECC_vli_isZero(s, num_words)) { + return 0; + } + + /* r, s must be < n. */ + if (uECC_vli_cmp_unsafe(curve->n, r, num_n_words) != 1 || + uECC_vli_cmp_unsafe(curve->n, s, num_n_words) != 1) { + return 0; + } + + /* Calculate u1 and u2. */ + uECC_vli_modInv(z, s, curve->n, num_n_words); /* z = 1/s */ + u1[num_n_words - 1] = 0; + bits2int(u1, message_hash, hash_size, curve); + uECC_vli_modMult(u1, u1, z, curve->n, num_n_words); /* u1 = e/s */ + uECC_vli_modMult(u2, r, z, curve->n, num_n_words); /* u2 = r/s */ + + /* Calculate sum = G + Q. */ + uECC_vli_set(sum, _public, num_words); + uECC_vli_set(sum + num_words, _public + num_words, num_words); + uECC_vli_set(tx, curve->G, num_words); + uECC_vli_set(ty, curve->G + num_words, num_words); + uECC_vli_modSub(z, sum, tx, curve->p, num_words); /* z = x2 - x1 */ + XYcZ_add(tx, ty, sum, sum + num_words, curve); + uECC_vli_modInv(z, z, curve->p, num_words); /* z = 1/z */ + apply_z(sum, sum + num_words, z, curve); + + /* Use Shamir's trick to calculate u1*G + u2*Q */ + points[0] = 0; + points[1] = curve->G; + points[2] = _public; + points[3] = sum; + num_bits = smax(uECC_vli_numBits(u1, num_n_words), + uECC_vli_numBits(u2, num_n_words)); + point = points[(!!uECC_vli_testBit(u1, (bitcount_t) (num_bits - 1))) | + ((!!uECC_vli_testBit(u2, (bitcount_t) (num_bits - 1))) << 1)]; + uECC_vli_set(rx, point, num_words); + uECC_vli_set(ry, point + num_words, num_words); + uECC_vli_clear(z, num_words); + z[0] = 1; + + for (i = num_bits - 2; i >= 0; --i) { + uECC_word_t index; + curve->double_jacobian(rx, ry, z, curve); + + index = (!!uECC_vli_testBit(u1, i)) | + (uECC_word_t) ((!!uECC_vli_testBit(u2, i)) << 1); + point = points[index]; + if (point) { + uECC_vli_set(tx, point, num_words); + uECC_vli_set(ty, point + num_words, num_words); + apply_z(tx, ty, z, curve); + uECC_vli_modSub(tz, rx, tx, curve->p, num_words); /* Z = x2 - x1 */ + XYcZ_add(tx, ty, rx, ry, curve); + uECC_vli_modMult_fast(z, z, tz, curve); + } + } + + uECC_vli_modInv(z, z, curve->p, num_words); /* Z = 1/Z */ + apply_z(rx, ry, z, curve); + + /* v = x1 (mod n) */ + if (uECC_vli_cmp_unsafe(curve->n, rx, num_n_words) != 1) { + uECC_vli_sub(rx, rx, curve->n, num_n_words); + } + + /* Accept only if v == r. */ + return (int) (uECC_vli_equal(rx, r, num_words)); +} + +#if uECC_ENABLE_VLI_API + +unsigned uECC_curve_num_words(uECC_Curve curve) { + return curve->num_words; +} + +unsigned uECC_curve_num_bytes(uECC_Curve curve) { + return curve->num_bytes; +} + +unsigned uECC_curve_num_bits(uECC_Curve curve) { + return curve->num_bytes * 8; +} + +unsigned uECC_curve_num_n_words(uECC_Curve curve) { + return BITS_TO_WORDS(curve->num_n_bits); +} + +unsigned uECC_curve_num_n_bytes(uECC_Curve curve) { + return BITS_TO_BYTES(curve->num_n_bits); +} + +unsigned uECC_curve_num_n_bits(uECC_Curve curve) { + return curve->num_n_bits; +} + +const uECC_word_t *uECC_curve_p(uECC_Curve curve) { + return curve->p; +} + +const uECC_word_t *uECC_curve_n(uECC_Curve curve) { + return curve->n; +} + +const uECC_word_t *uECC_curve_G(uECC_Curve curve) { + return curve->G; +} + +const uECC_word_t *uECC_curve_b(uECC_Curve curve) { + return curve->b; +} + +#if uECC_SUPPORT_COMPRESSED_POINT +void uECC_vli_mod_sqrt(uECC_word_t *a, uECC_Curve curve) { + curve->mod_sqrt(a, curve); +} +#endif + +void uECC_vli_mmod_fast(uECC_word_t *result, uECC_word_t *product, + uECC_Curve curve) { +#if (uECC_OPTIMIZATION_LEVEL > 0) + curve->mmod_fast(result, product); +#else + uECC_vli_mmod(result, product, curve->p, curve->num_words); +#endif +} + +void uECC_point_mult(uECC_word_t *result, const uECC_word_t *point, + const uECC_word_t *scalar, uECC_Curve curve) { + uECC_word_t tmp1[uECC_MAX_WORDS]; + uECC_word_t tmp2[uECC_MAX_WORDS]; + uECC_word_t *p2[2] = {tmp1, tmp2}; + uECC_word_t carry = regularize_k(scalar, tmp1, tmp2, curve); + + EccPoint_mult(result, point, p2[!carry], 0, curve->num_n_bits + 1, curve); +} + +#endif /* uECC_ENABLE_VLI_API */ +#endif // MG_TLS_BUILTIN + #ifdef MG_ENABLE_LINES #line 1 "src/url.c" #endif diff --git a/mongoose.h b/mongoose.h index b3b2c225..203b78a6 100644 --- a/mongoose.h +++ b/mongoose.h @@ -1175,6 +1175,923 @@ void mg_sha1_update(mg_sha1_ctx *, const unsigned char *data, size_t len); void mg_sha1_final(unsigned char digest[20], mg_sha1_ctx *); + + +typedef struct { + uint32_t state[8]; + uint64_t bits; + uint32_t len; + unsigned char buffer[64]; +} mg_sha256_ctx; + +void mg_sha256_init(mg_sha256_ctx *); +void mg_sha256_update(mg_sha256_ctx *, const unsigned char *data, size_t len); +void mg_sha256_final(unsigned char digest[32], mg_sha256_ctx *); +void mg_hmac_sha256(uint8_t dst[32], uint8_t *key, size_t keysz, uint8_t *data, + size_t datasz); +/****************************************************************************** + * + * THIS SOURCE CODE IS HEREBY PLACED INTO THE PUBLIC DOMAIN FOR THE GOOD OF ALL + * + * This is a simple and straightforward implementation of the AES Rijndael + * 128-bit block cipher designed by Vincent Rijmen and Joan Daemen. The focus + * of this work was correctness & accuracy. It is written in 'C' without any + * particular focus upon optimization or speed. It should be endian (memory + * byte order) neutral since the few places that care are handled explicitly. + * + * This implementation of Rijndael was created by Steven M. Gibson of GRC.com. + * + * It is intended for general purpose use, but was written in support of GRC's + * reference implementation of the SQRL (Secure Quick Reliable Login) client. + * + * See: http://csrc.nist.gov/archive/aes/rijndael/wsdindex.html + * + * NO COPYRIGHT IS CLAIMED IN THIS WORK, HOWEVER, NEITHER IS ANY WARRANTY MADE + * REGARDING ITS FITNESS FOR ANY PARTICULAR PURPOSE. USE IT AT YOUR OWN RISK. + * + *******************************************************************************/ + +#ifndef AES_HEADER +#define AES_HEADER + +/******************************************************************************/ +#define AES_DECRYPTION 1 // whether AES decryption is supported +/******************************************************************************/ + +#define ENCRYPT 1 // specify whether we're encrypting +#define DECRYPT 0 // or decrypting + + + +typedef unsigned char uchar; // add some convienent shorter types +typedef unsigned int uint; + +/****************************************************************************** + * AES_INIT_KEYGEN_TABLES : MUST be called once before any AES use + ******************************************************************************/ +void aes_init_keygen_tables(void); + +/****************************************************************************** + * AES_CONTEXT : cipher context / holds inter-call data + ******************************************************************************/ +typedef struct { + int mode; // 1 for Encryption, 0 for Decryption + int rounds; // keysize-based rounds count + uint32_t *rk; // pointer to current round key + uint32_t buf[68]; // key expansion buffer +} aes_context; + +/****************************************************************************** + * AES_SETKEY : called to expand the key for encryption or decryption + ******************************************************************************/ +int aes_setkey(aes_context *ctx, // pointer to context + int mode, // 1 or 0 for Encrypt/Decrypt + const uchar *key, // AES input key + uint keysize); // size in bytes (must be 16, 24, 32 for + // 128, 192 or 256-bit keys respectively) + // returns 0 for success + +/****************************************************************************** + * AES_CIPHER : called to encrypt or decrypt ONE 128-bit block of data + ******************************************************************************/ +int aes_cipher(aes_context *ctx, // pointer to context + const uchar input[16], // 128-bit block to en/decipher + uchar output[16]); // 128-bit output result block + // returns 0 for success + +#endif /* AES_HEADER */ +/****************************************************************************** + * + * THIS SOURCE CODE IS HEREBY PLACED INTO THE PUBLIC DOMAIN FOR THE GOOD OF ALL + * + * This is a simple and straightforward implementation of AES-GCM authenticated + * encryption. The focus of this work was correctness & accuracy. It is written + * in straight 'C' without any particular focus upon optimization or speed. It + * should be endian (memory byte order) neutral since the few places that care + * are handled explicitly. + * + * This implementation of AES-GCM was created by Steven M. Gibson of GRC.com. + * + * It is intended for general purpose use, but was written in support of GRC's + * reference implementation of the SQRL (Secure Quick Reliable Login) client. + * + * See: http://csrc.nist.gov/publications/nistpubs/800-38D/SP-800-38D.pdf + * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/ \ + * gcm/gcm-revised-spec.pdf + * + * NO COPYRIGHT IS CLAIMED IN THIS WORK, HOWEVER, NEITHER IS ANY WARRANTY MADE + * REGARDING ITS FITNESS FOR ANY PARTICULAR PURPOSE. USE IT AT YOUR OWN RISK. + * + *******************************************************************************/ +#ifndef GCM_HEADER +#define GCM_HEADER + + +#define GCM_AUTH_FAILURE 0x55555555 // authentication failure + +/****************************************************************************** + * GCM_CONTEXT : GCM context / holds keytables, instance data, and AES ctx + ******************************************************************************/ +typedef struct { + int mode; // cipher direction: encrypt/decrypt + uint64_t len; // cipher data length processed so far + uint64_t add_len; // total add data length + uint64_t HL[16]; // precalculated lo-half HTable + uint64_t HH[16]; // precalculated hi-half HTable + uchar base_ectr[16]; // first counter-mode cipher output for tag + uchar y[16]; // the current cipher-input IV|Counter value + uchar buf[16]; // buf working value + aes_context aes_ctx; // cipher context used +} gcm_context; + +/****************************************************************************** + * GCM_CONTEXT : MUST be called once before ANY use of this library + ******************************************************************************/ +int gcm_initialize(void); + +/****************************************************************************** + * GCM_SETKEY : sets the GCM (and AES) keying material for use + ******************************************************************************/ +int gcm_setkey(gcm_context *ctx, // caller-provided context ptr + const uchar *key, // pointer to cipher key + const uint keysize // size in bytes (must be 16, 24, 32 for + // 128, 192 or 256-bit keys respectively) +); // returns 0 for success + +/****************************************************************************** + * + * GCM_CRYPT_AND_TAG + * + * This either encrypts or decrypts the user-provided data and, either + * way, generates an authentication tag of the requested length. It must be + * called with a GCM context whose key has already been set with GCM_SETKEY. + * + * The user would typically call this explicitly to ENCRYPT a buffer of data + * and optional associated data, and produce its an authentication tag. + * + * To reverse the process the user would typically call the companion + * GCM_AUTH_DECRYPT function to decrypt data and verify a user-provided + * authentication tag. The GCM_AUTH_DECRYPT function calls this function + * to perform its decryption and tag generation, which it then compares. + * + ******************************************************************************/ +int gcm_crypt_and_tag( + gcm_context *ctx, // gcm context with key already setup + int mode, // cipher direction: ENCRYPT (1) or DECRYPT (0) + const uchar *iv, // pointer to the 12-byte initialization vector + size_t iv_len, // byte length if the IV. should always be 12 + const uchar *add, // pointer to the non-ciphered additional data + size_t add_len, // byte length of the additional AEAD data + const uchar *input, // pointer to the cipher data source + uchar *output, // pointer to the cipher data destination + size_t length, // byte length of the cipher data + uchar *tag, // pointer to the tag to be generated + size_t tag_len); // byte length of the tag to be generated + +/****************************************************************************** + * + * GCM_AUTH_DECRYPT + * + * This DECRYPTS a user-provided data buffer with optional associated data. + * It then verifies a user-supplied authentication tag against the tag just + * re-created during decryption to verify that the data has not been altered. + * + * This function calls GCM_CRYPT_AND_TAG (above) to perform the decryption + * and authentication tag generation. + * + ******************************************************************************/ +int gcm_auth_decrypt( + gcm_context *ctx, // gcm context with key already setup + const uchar *iv, // pointer to the 12-byte initialization vector + size_t iv_len, // byte length if the IV. should always be 12 + const uchar *add, // pointer to the non-ciphered additional data + size_t add_len, // byte length of the additional AEAD data + const uchar *input, // pointer to the cipher data source + uchar *output, // pointer to the cipher data destination + size_t length, // byte length of the cipher data + const uchar *tag, // pointer to the tag to be authenticated + size_t tag_len); // byte length of the tag <= 16 + +/****************************************************************************** + * + * GCM_START + * + * Given a user-provided GCM context, this initializes it, sets the encryption + * mode, and preprocesses the initialization vector and additional AEAD data. + * + ******************************************************************************/ +int gcm_start( + gcm_context *ctx, // pointer to user-provided GCM context + int mode, // ENCRYPT (1) or DECRYPT (0) + const uchar *iv, // pointer to initialization vector + size_t iv_len, // IV length in bytes (should == 12) + const uchar *add, // pointer to additional AEAD data (NULL if none) + size_t add_len); // length of additional AEAD data (bytes) + +/****************************************************************************** + * + * GCM_UPDATE + * + * This is called once or more to process bulk plaintext or ciphertext data. + * We give this some number of bytes of input and it returns the same number + * of output bytes. If called multiple times (which is fine) all but the final + * invocation MUST be called with length mod 16 == 0. (Only the final call can + * have a partial block length of < 128 bits.) + * + ******************************************************************************/ +int gcm_update(gcm_context *ctx, // pointer to user-provided GCM context + size_t length, // length, in bytes, of data to process + const uchar *input, // pointer to source data + uchar *output); // pointer to destination data + +/****************************************************************************** + * + * GCM_FINISH + * + * This is called once after all calls to GCM_UPDATE to finalize the GCM. + * It performs the final GHASH to produce the resulting authentication TAG. + * + ******************************************************************************/ +int gcm_finish(gcm_context *ctx, // pointer to user-provided GCM context + uchar *tag, // ptr to tag buffer - NULL if tag_len = 0 + size_t tag_len); // length, in bytes, of the tag-receiving buf + +/****************************************************************************** + * + * GCM_ZERO_CTX + * + * The GCM context contains both the GCM context and the AES context. + * This includes keying and key-related material which is security- + * sensitive, so it MUST be zeroed after use. This function does that. + * + ******************************************************************************/ +void gcm_zero_ctx(gcm_context *ctx); + +#endif /* GCM_HEADER */ +// +// aes-gcm.h +// MKo +// +// Created by Markus Kosmal on 20/11/14. +// +// + +#ifndef mko_aes_gcm_h +#define mko_aes_gcm_h + +int aes_gcm_encrypt(unsigned char *output, const unsigned char *input, + size_t input_length, const unsigned char *key, + const size_t key_len, const unsigned char *iv, + const size_t iv_len, unsigned char *aead, size_t aead_len, + unsigned char *tag, const size_t tag_len); + +int aes_gcm_decrypt(unsigned char *output, const unsigned char *input, + size_t input_length, const unsigned char *key, + const size_t key_len, const unsigned char *iv, + const size_t iv_len); + +#endif + + + + +#define uECC_SUPPORTS_secp256r1 1 +/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */ + +#ifndef _UECC_H_ +#define _UECC_H_ + +/* Platform selection options. +If uECC_PLATFORM is not defined, the code will try to guess it based on compiler +macros. Possible values for uECC_PLATFORM are defined below: */ +#define uECC_arch_other 0 +#define uECC_x86 1 +#define uECC_x86_64 2 +#define uECC_arm 3 +#define uECC_arm_thumb 4 +#define uECC_arm_thumb2 5 +#define uECC_arm64 6 +#define uECC_avr 7 + +/* If desired, you can define uECC_WORD_SIZE as appropriate for your platform +(1, 4, or 8 bytes). If uECC_WORD_SIZE is not explicitly defined then it will be +automatically set based on your platform. */ + +/* Optimization level; trade speed for code size. + Larger values produce code that is faster but larger. + Currently supported values are 0 - 4; 0 is unusably slow for most + applications. Optimization level 4 currently only has an effect ARM platforms + where more than one curve is enabled. */ +#ifndef uECC_OPTIMIZATION_LEVEL +#define uECC_OPTIMIZATION_LEVEL 2 +#endif + +/* uECC_SQUARE_FUNC - If enabled (defined as nonzero), this will cause a +specific function to be used for (scalar) squaring instead of the generic +multiplication function. This can make things faster somewhat faster, but +increases the code size. */ +#ifndef uECC_SQUARE_FUNC +#define uECC_SQUARE_FUNC 0 +#endif + +/* uECC_VLI_NATIVE_LITTLE_ENDIAN - If enabled (defined as nonzero), this will +switch to native little-endian format for *all* arrays passed in and out of the +public API. This includes public and private keys, shared secrets, signatures +and message hashes. Using this switch reduces the amount of call stack memory +used by uECC, since less intermediate translations are required. Note that this +will *only* work on native little-endian processors and it will treat the +uint8_t arrays passed into the public API as word arrays, therefore requiring +the provided byte arrays to be word aligned on architectures that do not support +unaligned accesses. IMPORTANT: Keys and signatures generated with +uECC_VLI_NATIVE_LITTLE_ENDIAN=1 are incompatible with keys and signatures +generated with uECC_VLI_NATIVE_LITTLE_ENDIAN=0; all parties must use the same +endianness. */ +#ifndef uECC_VLI_NATIVE_LITTLE_ENDIAN +#define uECC_VLI_NATIVE_LITTLE_ENDIAN 0 +#endif + +/* Curve support selection. Set to 0 to remove that curve. */ +#ifndef uECC_SUPPORTS_secp160r1 +#define uECC_SUPPORTS_secp160r1 0 +#endif +#ifndef uECC_SUPPORTS_secp192r1 +#define uECC_SUPPORTS_secp192r1 0 +#endif +#ifndef uECC_SUPPORTS_secp224r1 +#define uECC_SUPPORTS_secp224r1 0 +#endif +#ifndef uECC_SUPPORTS_secp256r1 +#define uECC_SUPPORTS_secp256r1 1 +#endif +#ifndef uECC_SUPPORTS_secp256k1 +#define uECC_SUPPORTS_secp256k1 0 +#endif + +/* Specifies whether compressed point format is supported. + Set to 0 to disable point compression/decompression functions. */ +#ifndef uECC_SUPPORT_COMPRESSED_POINT +#define uECC_SUPPORT_COMPRESSED_POINT 1 +#endif + +struct uECC_Curve_t; +typedef const struct uECC_Curve_t *uECC_Curve; + +#ifdef __cplusplus +extern "C" { +#endif + +#if uECC_SUPPORTS_secp160r1 +uECC_Curve uECC_secp160r1(void); +#endif +#if uECC_SUPPORTS_secp192r1 +uECC_Curve uECC_secp192r1(void); +#endif +#if uECC_SUPPORTS_secp224r1 +uECC_Curve uECC_secp224r1(void); +#endif +#if uECC_SUPPORTS_secp256r1 +uECC_Curve uECC_secp256r1(void); +#endif +#if uECC_SUPPORTS_secp256k1 +uECC_Curve uECC_secp256k1(void); +#endif + +/* uECC_RNG_Function type +The RNG function should fill 'size' random bytes into 'dest'. It should return 1 +if 'dest' was filled with random data, or 0 if the random data could not be +generated. The filled-in values should be either truly random, or from a +cryptographically-secure PRNG. + +A correctly functioning RNG function must be set (using uECC_set_rng()) before +calling uECC_make_key() or uECC_sign(). + +Setting a correctly functioning RNG function improves the resistance to +side-channel attacks for uECC_shared_secret() and uECC_sign_deterministic(). + +A correct RNG function is set by default when building for Windows, Linux, or OS +X. If you are building on another POSIX-compliant system that supports +/dev/random or /dev/urandom, you can define uECC_POSIX to use the predefined +RNG. For embedded platforms there is no predefined RNG function; you must +provide your own. +*/ +typedef int (*uECC_RNG_Function)(uint8_t *dest, unsigned size); + +/* uECC_set_rng() function. +Set the function that will be used to generate random bytes. The RNG function +should return 1 if the random data was generated, or 0 if the random data could +not be generated. + +On platforms where there is no predefined RNG function (eg embedded platforms), +this must be called before uECC_make_key() or uECC_sign() are used. + +Inputs: + rng_function - The function that will be used to generate random bytes. +*/ +void uECC_set_rng(uECC_RNG_Function rng_function); + +/* uECC_get_rng() function. + +Returns the function that will be used to generate random bytes. +*/ +uECC_RNG_Function uECC_get_rng(void); + +/* uECC_curve_private_key_size() function. + +Returns the size of a private key for the curve in bytes. +*/ +int uECC_curve_private_key_size(uECC_Curve curve); + +/* uECC_curve_public_key_size() function. + +Returns the size of a public key for the curve in bytes. +*/ +int uECC_curve_public_key_size(uECC_Curve curve); + +/* uECC_make_key() function. +Create a public/private key pair. + +Outputs: + public_key - Will be filled in with the public key. Must be at least 2 * +the curve size (in bytes) long. For example, if the curve is secp256r1, +public_key must be 64 bytes long. private_key - Will be filled in with the +private key. Must be as long as the curve order; this is typically the same as +the curve size, except for secp160r1. For example, if the curve is secp256r1, +private_key must be 32 bytes long. + + For secp160r1, private_key must be 21 bytes long! Note that +the first byte will almost always be 0 (there is about a 1 in 2^80 chance of it +being non-zero). + +Returns 1 if the key pair was generated successfully, 0 if an error occurred. +*/ +int uECC_make_key(uint8_t *public_key, uint8_t *private_key, uECC_Curve curve); + +/* uECC_shared_secret() function. +Compute a shared secret given your secret key and someone else's public key. If +the public key is not from a trusted source and has not been previously +verified, you should verify it first using uECC_valid_public_key(). Note: It is +recommended that you hash the result of uECC_shared_secret() before using it for +symmetric encryption or HMAC. + +Inputs: + public_key - The public key of the remote party. + private_key - Your private key. + +Outputs: + secret - Will be filled in with the shared secret value. Must be the same +size as the curve size; for example, if the curve is secp256r1, secret must be +32 bytes long. + +Returns 1 if the shared secret was generated successfully, 0 if an error +occurred. +*/ +int uECC_shared_secret(const uint8_t *public_key, const uint8_t *private_key, + uint8_t *secret, uECC_Curve curve); + +#if uECC_SUPPORT_COMPRESSED_POINT +/* uECC_compress() function. +Compress a public key. + +Inputs: + public_key - The public key to compress. + +Outputs: + compressed - Will be filled in with the compressed public key. Must be at +least (curve size + 1) bytes long; for example, if the curve is secp256r1, + compressed must be 33 bytes long. +*/ +void uECC_compress(const uint8_t *public_key, uint8_t *compressed, + uECC_Curve curve); + +/* uECC_decompress() function. +Decompress a compressed public key. + +Inputs: + compressed - The compressed public key. + +Outputs: + public_key - Will be filled in with the decompressed public key. +*/ +void uECC_decompress(const uint8_t *compressed, uint8_t *public_key, + uECC_Curve curve); +#endif /* uECC_SUPPORT_COMPRESSED_POINT */ + +/* uECC_valid_public_key() function. +Check to see if a public key is valid. + +Note that you are not required to check for a valid public key before using any +other uECC functions. However, you may wish to avoid spending CPU time computing +a shared secret or verifying a signature using an invalid public key. + +Inputs: + public_key - The public key to check. + +Returns 1 if the public key is valid, 0 if it is invalid. +*/ +int uECC_valid_public_key(const uint8_t *public_key, uECC_Curve curve); + +/* uECC_compute_public_key() function. +Compute the corresponding public key for a private key. + +Inputs: + private_key - The private key to compute the public key for + +Outputs: + public_key - Will be filled in with the corresponding public key + +Returns 1 if the key was computed successfully, 0 if an error occurred. +*/ +int uECC_compute_public_key(const uint8_t *private_key, uint8_t *public_key, + uECC_Curve curve); + +/* uECC_sign() function. +Generate an ECDSA signature for a given hash value. + +Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and +pass it in to this function along with your private key. + +Inputs: + private_key - Your private key. + message_hash - The hash of the message to sign. + hash_size - The size of message_hash in bytes. + +Outputs: + signature - Will be filled in with the signature value. Must be at least 2 * +curve size long. For example, if the curve is secp256r1, signature must be 64 +bytes long. + +Returns 1 if the signature generated successfully, 0 if an error occurred. +*/ +int uECC_sign(const uint8_t *private_key, const uint8_t *message_hash, + unsigned hash_size, uint8_t *signature, uECC_Curve curve); + +/* uECC_HashContext structure. +This is used to pass in an arbitrary hash function to uECC_sign_deterministic(). +The structure will be used for multiple hash computations; each time a new hash +is computed, init_hash() will be called, followed by one or more calls to +update_hash(), and finally a call to finish_hash() to produce the resulting +hash. + +The intention is that you will create a structure that includes uECC_HashContext +followed by any hash-specific data. For example: + +typedef struct SHA256_HashContext { + uECC_HashContext uECC; + SHA256_CTX ctx; +} SHA256_HashContext; + +void init_SHA256(uECC_HashContext *base) { + SHA256_HashContext *context = (SHA256_HashContext *)base; + SHA256_Init(&context->ctx); +} + +void update_SHA256(uECC_HashContext *base, + const uint8_t *message, + unsigned message_size) { + SHA256_HashContext *context = (SHA256_HashContext *)base; + SHA256_Update(&context->ctx, message, message_size); +} + +void finish_SHA256(uECC_HashContext *base, uint8_t *hash_result) { + SHA256_HashContext *context = (SHA256_HashContext *)base; + SHA256_Final(hash_result, &context->ctx); +} + +... when signing ... +{ + uint8_t tmp[32 + 32 + 64]; + SHA256_HashContext ctx = {{&init_SHA256, &update_SHA256, &finish_SHA256, 64, +32, tmp}}; uECC_sign_deterministic(key, message_hash, &ctx.uECC, signature); +} +*/ +typedef struct uECC_HashContext { + void (*init_hash)(const struct uECC_HashContext *context); + void (*update_hash)(const struct uECC_HashContext *context, + const uint8_t *message, unsigned message_size); + void (*finish_hash)(const struct uECC_HashContext *context, + uint8_t *hash_result); + unsigned + block_size; /* Hash function block size in bytes, eg 64 for SHA-256. */ + unsigned + result_size; /* Hash function result size in bytes, eg 32 for SHA-256. */ + uint8_t *tmp; /* Must point to a buffer of at least (2 * result_size + + block_size) bytes. */ +} uECC_HashContext; + +/* uECC_sign_deterministic() function. +Generate an ECDSA signature for a given hash value, using a deterministic +algorithm (see RFC 6979). You do not need to set the RNG using uECC_set_rng() +before calling this function; however, if the RNG is defined it will improve +resistance to side-channel attacks. + +Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and +pass it to this function along with your private key and a hash context. Note +that the message_hash does not need to be computed with the same hash function +used by hash_context. + +Inputs: + private_key - Your private key. + message_hash - The hash of the message to sign. + hash_size - The size of message_hash in bytes. + hash_context - A hash context to use. + +Outputs: + signature - Will be filled in with the signature value. + +Returns 1 if the signature generated successfully, 0 if an error occurred. +*/ +int uECC_sign_deterministic(const uint8_t *private_key, + const uint8_t *message_hash, unsigned hash_size, + const uECC_HashContext *hash_context, + uint8_t *signature, uECC_Curve curve); + +/* uECC_verify() function. +Verify an ECDSA signature. + +Usage: Compute the hash of the signed data using the same hash as the signer and +pass it to this function along with the signer's public key and the signature +values (r and s). + +Inputs: + public_key - The signer's public key. + message_hash - The hash of the signed data. + hash_size - The size of message_hash in bytes. + signature - The signature value. + +Returns 1 if the signature is valid, 0 if it is invalid. +*/ +int uECC_verify(const uint8_t *public_key, const uint8_t *message_hash, + unsigned hash_size, const uint8_t *signature, uECC_Curve curve); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* _UECC_H_ */ + +/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ + +#ifndef _UECC_VLI_H_ +#define _UECC_VLI_H_ + +// +// + +/* Functions for raw large-integer manipulation. These are only available + if uECC.c is compiled with uECC_ENABLE_VLI_API defined to 1. */ +#ifndef uECC_ENABLE_VLI_API +#define uECC_ENABLE_VLI_API 0 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if uECC_ENABLE_VLI_API + +void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words); + +/* Constant-time comparison to zero - secure way to compare long integers */ +/* Returns 1 if vli == 0, 0 otherwise. */ +uECC_word_t uECC_vli_isZero(const uECC_word_t *vli, wordcount_t num_words); + +/* Returns nonzero if bit 'bit' of vli is set. */ +uECC_word_t uECC_vli_testBit(const uECC_word_t *vli, bitcount_t bit); + +/* Counts the number of bits required to represent vli. */ +bitcount_t uECC_vli_numBits(const uECC_word_t *vli, + const wordcount_t max_words); + +/* Sets dest = src. */ +void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, + wordcount_t num_words); + +/* Constant-time comparison function - secure way to compare long integers */ +/* Returns one if left == right, zero otherwise */ +uECC_word_t uECC_vli_equal(const uECC_word_t *left, const uECC_word_t *right, + wordcount_t num_words); + +/* Constant-time comparison function - secure way to compare long integers */ +/* Returns sign of left - right, in constant time. */ +cmpresult_t uECC_vli_cmp(const uECC_word_t *left, const uECC_word_t *right, + wordcount_t num_words); + +/* Computes vli = vli >> 1. */ +void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words); + +/* Computes result = left + right, returning carry. Can modify in place. */ +uECC_word_t uECC_vli_add(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, wordcount_t num_words); + +/* Computes result = left - right, returning borrow. Can modify in place. */ +uECC_word_t uECC_vli_sub(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, wordcount_t num_words); + +/* Computes result = left * right. Result must be 2 * num_words long. */ +void uECC_vli_mult(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, wordcount_t num_words); + +/* Computes result = left^2. Result must be 2 * num_words long. */ +void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, + wordcount_t num_words); + +/* Computes result = (left + right) % mod. + Assumes that left < mod and right < mod, and that result does not overlap + mod. */ +void uECC_vli_modAdd(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, const uECC_word_t *mod, + wordcount_t num_words); + +/* Computes result = (left - right) % mod. + Assumes that left < mod and right < mod, and that result does not overlap + mod. */ +void uECC_vli_modSub(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, const uECC_word_t *mod, + wordcount_t num_words); + +/* Computes result = product % mod, where product is 2N words long. + Currently only designed to work for mod == curve->p or curve_n. */ +void uECC_vli_mmod(uECC_word_t *result, uECC_word_t *product, + const uECC_word_t *mod, wordcount_t num_words); + +/* Calculates result = product (mod curve->p), where product is up to + 2 * curve->num_words long. */ +void uECC_vli_mmod_fast(uECC_word_t *result, uECC_word_t *product, + uECC_Curve curve); + +/* Computes result = (left * right) % mod. + Currently only designed to work for mod == curve->p or curve_n. */ +void uECC_vli_modMult(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, const uECC_word_t *mod, + wordcount_t num_words); + +/* Computes result = (left * right) % curve->p. */ +void uECC_vli_modMult_fast(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, uECC_Curve curve); + +/* Computes result = left^2 % mod. + Currently only designed to work for mod == curve->p or curve_n. */ +void uECC_vli_modSquare(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *mod, wordcount_t num_words); + +/* Computes result = left^2 % curve->p. */ +void uECC_vli_modSquare_fast(uECC_word_t *result, const uECC_word_t *left, + uECC_Curve curve); + +/* Computes result = (1 / input) % mod.*/ +void uECC_vli_modInv(uECC_word_t *result, const uECC_word_t *input, + const uECC_word_t *mod, wordcount_t num_words); + +#if uECC_SUPPORT_COMPRESSED_POINT +/* Calculates a = sqrt(a) (mod curve->p) */ +void uECC_vli_mod_sqrt(uECC_word_t *a, uECC_Curve curve); +#endif + +/* Converts an integer in uECC native format to big-endian bytes. */ +void uECC_vli_nativeToBytes(uint8_t *bytes, int num_bytes, + const uECC_word_t *native); +/* Converts big-endian bytes to an integer in uECC native format. */ +void uECC_vli_bytesToNative(uECC_word_t *native, const uint8_t *bytes, + int num_bytes); + +unsigned uECC_curve_num_words(uECC_Curve curve); +unsigned uECC_curve_num_bytes(uECC_Curve curve); +unsigned uECC_curve_num_bits(uECC_Curve curve); +unsigned uECC_curve_num_n_words(uECC_Curve curve); +unsigned uECC_curve_num_n_bytes(uECC_Curve curve); +unsigned uECC_curve_num_n_bits(uECC_Curve curve); + +const uECC_word_t *uECC_curve_p(uECC_Curve curve); +const uECC_word_t *uECC_curve_n(uECC_Curve curve); +const uECC_word_t *uECC_curve_G(uECC_Curve curve); +const uECC_word_t *uECC_curve_b(uECC_Curve curve); + +int uECC_valid_point(const uECC_word_t *point, uECC_Curve curve); + +/* Multiplies a point by a scalar. Points are represented by the X coordinate + followed by the Y coordinate in the same array, both coordinates are + curve->num_words long. Note that scalar must be curve->num_n_words long (NOT + curve->num_words). */ +void uECC_point_mult(uECC_word_t *result, const uECC_word_t *point, + const uECC_word_t *scalar, uECC_Curve curve); + +/* Generates a random integer in the range 0 < random < top. + Both random and top have num_words words. */ +int uECC_generate_random_int(uECC_word_t *random, const uECC_word_t *top, + wordcount_t num_words); + +#endif /* uECC_ENABLE_VLI_API */ + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* _UECC_VLI_H_ */ + +/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ + +#ifndef _UECC_TYPES_H_ +#define _UECC_TYPES_H_ + +#ifndef uECC_PLATFORM +#if defined(__AVR__) && __AVR__ +#define uECC_PLATFORM uECC_avr +#elif defined(__thumb2__) || \ + defined(_M_ARMT) /* I think MSVC only supports Thumb-2 targets */ +#define uECC_PLATFORM uECC_arm_thumb2 +#elif defined(__thumb__) +#define uECC_PLATFORM uECC_arm_thumb +#elif defined(__arm__) || defined(_M_ARM) +#define uECC_PLATFORM uECC_arm +#elif defined(__aarch64__) +#define uECC_PLATFORM uECC_arm64 +#elif defined(__i386__) || defined(_M_IX86) || defined(_X86_) || \ + defined(__I86__) +#define uECC_PLATFORM uECC_x86 +#elif defined(__amd64__) || defined(_M_X64) +#define uECC_PLATFORM uECC_x86_64 +#else +#define uECC_PLATFORM uECC_arch_other +#endif +#endif + +#ifndef uECC_ARM_USE_UMAAL +#if (uECC_PLATFORM == uECC_arm) && (__ARM_ARCH >= 6) +#define uECC_ARM_USE_UMAAL 1 +#elif (uECC_PLATFORM == uECC_arm_thumb2) && (__ARM_ARCH >= 6) && \ + (!defined(__ARM_ARCH_7M__) || !__ARM_ARCH_7M__) +#define uECC_ARM_USE_UMAAL 1 +#else +#define uECC_ARM_USE_UMAAL 0 +#endif +#endif + +#ifndef uECC_WORD_SIZE +#if uECC_PLATFORM == uECC_avr +#define uECC_WORD_SIZE 1 +#elif (uECC_PLATFORM == uECC_x86_64 || uECC_PLATFORM == uECC_arm64) +#define uECC_WORD_SIZE 8 +#else +#define uECC_WORD_SIZE 4 +#endif +#endif + +#if (uECC_WORD_SIZE != 1) && (uECC_WORD_SIZE != 4) && (uECC_WORD_SIZE != 8) +#error "Unsupported value for uECC_WORD_SIZE" +#endif + +#if ((uECC_PLATFORM == uECC_avr) && (uECC_WORD_SIZE != 1)) +#pragma message("uECC_WORD_SIZE must be 1 for AVR") +#undef uECC_WORD_SIZE +#define uECC_WORD_SIZE 1 +#endif + +#if ((uECC_PLATFORM == uECC_arm || uECC_PLATFORM == uECC_arm_thumb || \ + uECC_PLATFORM == uECC_arm_thumb2) && \ + (uECC_WORD_SIZE != 4)) +#pragma message("uECC_WORD_SIZE must be 4 for ARM") +#undef uECC_WORD_SIZE +#define uECC_WORD_SIZE 4 +#endif + +typedef int8_t wordcount_t; +typedef int16_t bitcount_t; +typedef int8_t cmpresult_t; + +#if (uECC_WORD_SIZE == 1) + +typedef uint8_t uECC_word_t; +typedef uint16_t uECC_dword_t; + +#define HIGH_BIT_SET 0x80 +#define uECC_WORD_BITS 8 +#define uECC_WORD_BITS_SHIFT 3 +#define uECC_WORD_BITS_MASK 0x07 + +#elif (uECC_WORD_SIZE == 4) + +typedef uint32_t uECC_word_t; +typedef uint64_t uECC_dword_t; + +#define HIGH_BIT_SET 0x80000000 +#define uECC_WORD_BITS 32 +#define uECC_WORD_BITS_SHIFT 5 +#define uECC_WORD_BITS_MASK 0x01F + +#elif (uECC_WORD_SIZE == 8) + +typedef uint64_t uECC_word_t; + +#define HIGH_BIT_SET 0x8000000000000000U +#define uECC_WORD_BITS 64 +#define uECC_WORD_BITS_SHIFT 6 +#define uECC_WORD_BITS_MASK 0x03F + +#endif /* uECC_WORD_SIZE */ + +#endif /* _UECC_TYPES_H_ */ + + struct mg_connection; typedef void (*mg_event_handler_t)(struct mg_connection *, int ev, void *ev_data, void *fn_data); @@ -1308,80 +2225,6 @@ struct mg_timer *mg_timer_add(struct mg_mgr *mgr, uint64_t milliseconds, -// Macros to record timestamped events that happens with a connection. -// They are saved into a c->prof IO buffer, each event is a name and a 32-bit -// timestamp in milliseconds since connection init time. -// -// Test (run in two separate terminals): -// make -C examples/http-server/ CFLAGS_EXTRA=-DMG_ENABLE_PROFILE=1 -// curl localhost:8000 -// Output: -// 1ea1f1e7 2 net.c:150:mg_close_conn 3 profile: -// 1ea1f1e8 2 net.c:150:mg_close_conn 1ea1f1e6 init -// 1ea1f1e8 2 net.c:150:mg_close_conn 0 EV_OPEN -// 1ea1f1e8 2 net.c:150:mg_close_conn 0 EV_ACCEPT -// 1ea1f1e8 2 net.c:150:mg_close_conn 0 EV_READ -// 1ea1f1e8 2 net.c:150:mg_close_conn 0 EV_HTTP_MSG -// 1ea1f1e8 2 net.c:150:mg_close_conn 0 EV_WRITE -// 1ea1f1e8 2 net.c:150:mg_close_conn 1 EV_CLOSE -// -// Usage: -// Enable profiling by setting MG_ENABLE_PROFILE=1 -// Invoke MG_PROF_ADD(c, "MY_EVENT_1") in the places you'd like to measure - -#if MG_ENABLE_PROFILE -struct mg_profitem { - const char *name; // Event name - uint32_t timestamp; // Milliseconds since connection creation (MG_EV_OPEN) -}; - -#define MG_PROFILE_ALLOC_GRANULARITY 256 // Can save 32 items wih to realloc - -// Adding a profile item to the c->prof. Must be as fast as possible. -// Reallocation of the c->prof iobuf is not desirable here, that's why we -// pre-allocate c->prof with MG_PROFILE_ALLOC_GRANULARITY. -// This macro just inits and copies 8 bytes, and calls mg_millis(), -// which should be fast enough. -#define MG_PROF_ADD(c, name_) \ - do { \ - struct mg_iobuf *io = &c->prof; \ - uint32_t inittime = ((struct mg_profitem *) io->buf)->timestamp; \ - struct mg_profitem item = {name_, (uint32_t) mg_millis() - inittime}; \ - mg_iobuf_add(io, io->len, &item, sizeof(item)); \ - } while (0) - -// Initialising profile for a new connection. Not time sensitive -#define MG_PROF_INIT(c) \ - do { \ - struct mg_profitem first = {"init", (uint32_t) mg_millis()}; \ - mg_iobuf_init(&(c)->prof, 0, MG_PROFILE_ALLOC_GRANULARITY); \ - mg_iobuf_add(&c->prof, c->prof.len, &first, sizeof(first)); \ - } while (0) - -#define MG_PROF_FREE(c) mg_iobuf_free(&(c)->prof) - -// Dumping the profile. Not time sensitive -#define MG_PROF_DUMP(c) \ - do { \ - struct mg_iobuf *io = &c->prof; \ - struct mg_profitem *p = (struct mg_profitem *) io->buf; \ - struct mg_profitem *e = &p[io->len / sizeof(*p)]; \ - MG_INFO(("%lu profile:", c->id)); \ - while (p < e) { \ - MG_INFO(("%5lx %s", (unsigned long) p->timestamp, p->name)); \ - p++; \ - } \ - } while (0) - -#else -#define MG_PROF_INIT(c) -#define MG_PROF_FREE(c) -#define MG_PROF_ADD(c, name) -#define MG_PROF_DUMP(c) -#endif - - - @@ -1803,7 +2646,7 @@ enum { MG_OTA_UNAVAILABLE = 0, // No OTA information is present MG_OTA_FIRST_BOOT = 1, // Device booting the first time after the OTA MG_OTA_UNCOMMITTED = 2, // Ditto, but marking us for the rollback - MG_OTA_COMMITTED = 3, // The firmware is good + MG_OTA_COMMITTED = 3 // The firmware is good }; enum { MG_FIRMWARE_CURRENT = 0, MG_FIRMWARE_PREVIOUS = 1 }; @@ -1922,6 +2765,80 @@ struct mg_tcpip_spi { #endif + +// Macros to record timestamped events that happens with a connection. +// They are saved into a c->prof IO buffer, each event is a name and a 32-bit +// timestamp in milliseconds since connection init time. +// +// Test (run in two separate terminals): +// make -C examples/http-server/ CFLAGS_EXTRA=-DMG_ENABLE_PROFILE=1 +// curl localhost:8000 +// Output: +// 1ea1f1e7 2 net.c:150:mg_close_conn 3 profile: +// 1ea1f1e8 2 net.c:150:mg_close_conn 1ea1f1e6 init +// 1ea1f1e8 2 net.c:150:mg_close_conn 0 EV_OPEN +// 1ea1f1e8 2 net.c:150:mg_close_conn 0 EV_ACCEPT +// 1ea1f1e8 2 net.c:150:mg_close_conn 0 EV_READ +// 1ea1f1e8 2 net.c:150:mg_close_conn 0 EV_HTTP_MSG +// 1ea1f1e8 2 net.c:150:mg_close_conn 0 EV_WRITE +// 1ea1f1e8 2 net.c:150:mg_close_conn 1 EV_CLOSE +// +// Usage: +// Enable profiling by setting MG_ENABLE_PROFILE=1 +// Invoke MG_PROF_ADD(c, "MY_EVENT_1") in the places you'd like to measure + +#if MG_ENABLE_PROFILE +struct mg_profitem { + const char *name; // Event name + uint32_t timestamp; // Milliseconds since connection creation (MG_EV_OPEN) +}; + +#define MG_PROFILE_ALLOC_GRANULARITY 256 // Can save 32 items wih to realloc + +// Adding a profile item to the c->prof. Must be as fast as possible. +// Reallocation of the c->prof iobuf is not desirable here, that's why we +// pre-allocate c->prof with MG_PROFILE_ALLOC_GRANULARITY. +// This macro just inits and copies 8 bytes, and calls mg_millis(), +// which should be fast enough. +#define MG_PROF_ADD(c, name_) \ + do { \ + struct mg_iobuf *io = &c->prof; \ + uint32_t inittime = ((struct mg_profitem *) io->buf)->timestamp; \ + struct mg_profitem item = {name_, (uint32_t) mg_millis() - inittime}; \ + mg_iobuf_add(io, io->len, &item, sizeof(item)); \ + } while (0) + +// Initialising profile for a new connection. Not time sensitive +#define MG_PROF_INIT(c) \ + do { \ + struct mg_profitem first = {"init", (uint32_t) mg_millis()}; \ + mg_iobuf_init(&(c)->prof, 0, MG_PROFILE_ALLOC_GRANULARITY); \ + mg_iobuf_add(&c->prof, c->prof.len, &first, sizeof(first)); \ + } while (0) + +#define MG_PROF_FREE(c) mg_iobuf_free(&(c)->prof) + +// Dumping the profile. Not time sensitive +#define MG_PROF_DUMP(c) \ + do { \ + struct mg_iobuf *io = &c->prof; \ + struct mg_profitem *p = (struct mg_profitem *) io->buf; \ + struct mg_profitem *e = &p[io->len / sizeof(*p)]; \ + MG_INFO(("%lu profile:", c->id)); \ + while (p < e) { \ + MG_INFO(("%5lx %s", (unsigned long) p->timestamp, p->name)); \ + p++; \ + } \ + } while (0) + +#else +#define MG_PROF_INIT(c) +#define MG_PROF_FREE(c) +#define MG_PROF_ADD(c, name) +#define MG_PROF_DUMP(c) +#endif + + #if MG_ENABLE_TCPIP && defined(MG_ENABLE_DRIVER_CMSIS) && MG_ENABLE_DRIVER_CMSIS #include "Driver_ETH_MAC.h" // keep this include diff --git a/src/base64.c b/src/base64.c index ffd72cf5..e3f78fa3 100644 --- a/src/base64.c +++ b/src/base64.c @@ -69,14 +69,15 @@ size_t mg_base64_encode(const unsigned char *p, size_t n, char *to, size_t dl) { size_t mg_base64_decode(const char *src, size_t n, char *dst, size_t dl) { const char *end = src == NULL ? NULL : src + n; // Cannot add to NULL size_t len = 0; - if (dl > 0) dst[0] = '\0'; - if (dl < n / 4 * 3 + 1) return 0; + if (dl < n / 4 * 3 + 1) goto fail; while (src != NULL && src + 3 < end) { int a = mg_base64_decode_single(src[0]), b = mg_base64_decode_single(src[1]), c = mg_base64_decode_single(src[2]), d = mg_base64_decode_single(src[3]); - if (a == 64 || a < 0 || b == 64 || b < 0 || c < 0 || d < 0) return 0; + if (a == 64 || a < 0 || b == 64 || b < 0 || c < 0 || d < 0) { + goto fail; + } dst[len++] = (char) ((a << 2) | (b >> 4)); if (src[2] != '=') { dst[len++] = (char) ((b << 4) | (c >> 2)); @@ -86,4 +87,7 @@ size_t mg_base64_decode(const char *src, size_t n, char *dst, size_t dl) { } dst[len] = '\0'; return len; +fail: + if (dl > 0) dst[0] = '\0'; + return 0; } diff --git a/src/http.c b/src/http.c index a20bf3f0..f0b8453d 100644 --- a/src/http.c +++ b/src/http.c @@ -201,7 +201,7 @@ static bool vcb(uint8_t c) { // Get character length (valid utf-8). Used to parse method, URI, headers static size_t clen(const char *s, const char *end) { const unsigned char *u = (unsigned char *) s, c = *u; - long n = end - s; + long n = (long) (end - s); if (c > ' ' && c < '~') return 1; // Usual ascii printed char if ((c & 0xe0) == 0xc0 && n > 1 && vcb(u[1])) return 2; // 2-byte UTF8 if ((c & 0xf0) == 0xe0 && n > 2 && vcb(u[1]) && vcb(u[2])) return 3; @@ -983,7 +983,8 @@ static void http_cb(struct mg_connection *c, int ev, void *evd, void *fnd) { struct mg_str *te; // Transfer - encoding header bool is_chunked = false; if (n < 0) { - mg_error(c, "HTTP parse"); + mg_error(c, "HTTP parse, %lu bytes", c->recv.len); + mg_hexdump(c->recv.buf, c->recv.len > 16 ? 16 : c->recv.len); return; } if (n == 0) break; // Request is not buffered yet diff --git a/src/ota.h b/src/ota.h index 7eae2a49..d9b25990 100644 --- a/src/ota.h +++ b/src/ota.h @@ -28,7 +28,7 @@ enum { MG_OTA_UNAVAILABLE = 0, // No OTA information is present MG_OTA_FIRST_BOOT = 1, // Device booting the first time after the OTA MG_OTA_UNCOMMITTED = 2, // Ditto, but marking us for the rollback - MG_OTA_COMMITTED = 3, // The firmware is good + MG_OTA_COMMITTED = 3 // The firmware is good }; enum { MG_FIRMWARE_CURRENT = 0, MG_FIRMWARE_PREVIOUS = 1 }; diff --git a/src/sha256.c b/src/sha256.c new file mode 100644 index 00000000..c1dccf52 --- /dev/null +++ b/src/sha256.c @@ -0,0 +1,160 @@ +#include "sha256.h" + +#define ror(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +#define ch(x, y, z) (((x) & (y)) ^ (~(x) & (z))) +#define maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) +#define ep0(x) (ror(x, 2) ^ ror(x, 13) ^ ror(x, 22)) +#define ep1(x) (ror(x, 6) ^ ror(x, 11) ^ ror(x, 25)) +#define sig0(x) (ror(x, 7) ^ ror(x, 18) ^ ((x) >> 3)) +#define sig1(x) (ror(x, 17) ^ ror(x, 19) ^ ((x) >> 10)) + +static const uint32_t mg_sha256_k[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, + 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786, + 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, + 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b, + 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, + 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2}; + +void mg_sha256_init(mg_sha256_ctx *ctx) { + ctx->len = 0; + ctx->bits = 0; + ctx->state[0] = 0x6a09e667; + ctx->state[1] = 0xbb67ae85; + ctx->state[2] = 0x3c6ef372; + ctx->state[3] = 0xa54ff53a; + ctx->state[4] = 0x510e527f; + ctx->state[5] = 0x9b05688c; + ctx->state[6] = 0x1f83d9ab; + ctx->state[7] = 0x5be0cd19; +} + +static void mg_sha256_chunk(mg_sha256_ctx *ctx) { + int i, j; + uint32_t a, b, c, d, e, f, g, h; + uint32_t m[64]; + for (i = 0, j = 0; i < 16; ++i, j += 4) + m[i] = (uint32_t) ((ctx->buffer[j] << 24) | (ctx->buffer[j + 1] << 16) | + (ctx->buffer[j + 2] << 8) | (ctx->buffer[j + 3])); + for (; i < 64; ++i) + m[i] = sig1(m[i - 2]) + m[i - 7] + sig0(m[i - 15]) + m[i - 16]; + + a = ctx->state[0]; + b = ctx->state[1]; + c = ctx->state[2]; + d = ctx->state[3]; + e = ctx->state[4]; + f = ctx->state[5]; + g = ctx->state[6]; + h = ctx->state[7]; + + for (i = 0; i < 64; ++i) { + uint32_t t1 = h + ep1(e) + ch(e, f, g) + mg_sha256_k[i] + m[i]; + uint32_t t2 = ep0(a) + maj(a, b, c); + h = g; + g = f; + f = e; + e = d + t1; + d = c; + c = b; + b = a; + a = t1 + t2; + } + + ctx->state[0] += a; + ctx->state[1] += b; + ctx->state[2] += c; + ctx->state[3] += d; + ctx->state[4] += e; + ctx->state[5] += f; + ctx->state[6] += g; + ctx->state[7] += h; +} + +void mg_sha256_update(mg_sha256_ctx *ctx, const unsigned char *data, + size_t len) { + size_t i; + for (i = 0; i < len; i++) { + ctx->buffer[ctx->len] = data[i]; + if ((++ctx->len) == 64) { + mg_sha256_chunk(ctx); + ctx->bits += 512; + ctx->len = 0; + } + } +} + +// TODO: make final reusable (remove side effects) +void mg_sha256_final(unsigned char digest[32], mg_sha256_ctx *ctx) { + uint32_t i = ctx->len; + if (i < 56) { + ctx->buffer[i++] = 0x80; + while (i < 56) { + ctx->buffer[i++] = 0x00; + } + } else { + ctx->buffer[i++] = 0x80; + while (i < 64) { + ctx->buffer[i++] = 0x00; + } + mg_sha256_chunk(ctx); + memset(ctx->buffer, 0, 56); + } + + ctx->bits += ctx->len * 8; + ctx->buffer[63] = (uint8_t) ((ctx->bits) & 0xff); + ctx->buffer[62] = (uint8_t) ((ctx->bits >> 8) & 0xff); + ctx->buffer[61] = (uint8_t) ((ctx->bits >> 16) & 0xff); + ctx->buffer[60] = (uint8_t) ((ctx->bits >> 24) & 0xff); + ctx->buffer[59] = (uint8_t) ((ctx->bits >> 32) & 0xff); + ctx->buffer[58] = (uint8_t) ((ctx->bits >> 40) & 0xff); + ctx->buffer[57] = (uint8_t) ((ctx->bits >> 48) & 0xff); + ctx->buffer[56] = (uint8_t) ((ctx->bits >> 56) & 0xff); + mg_sha256_chunk(ctx); + + for (i = 0; i < 4; ++i) { + digest[i] = (ctx->state[0] >> (24 - i * 8)) & 0xff; + digest[i + 4] = (ctx->state[1] >> (24 - i * 8)) & 0xff; + digest[i + 8] = (ctx->state[2] >> (24 - i * 8)) & 0xff; + digest[i + 12] = (ctx->state[3] >> (24 - i * 8)) & 0xff; + digest[i + 16] = (ctx->state[4] >> (24 - i * 8)) & 0xff; + digest[i + 20] = (ctx->state[5] >> (24 - i * 8)) & 0xff; + digest[i + 24] = (ctx->state[6] >> (24 - i * 8)) & 0xff; + digest[i + 28] = (ctx->state[7] >> (24 - i * 8)) & 0xff; + } +} + +void mg_hmac_sha256(uint8_t dst[32], uint8_t *key, size_t keysz, uint8_t *data, + size_t datasz) { + mg_sha256_ctx ctx; + uint8_t k[64] = {0}; + uint8_t o_pad[64], i_pad[64]; + unsigned int i; + memset(i_pad, 0x36, sizeof(i_pad)); + memset(o_pad, 0x5c, sizeof(o_pad)); + if (keysz < 64) { + memmove(k, key, keysz); + } else { + mg_sha256_init(&ctx); + mg_sha256_update(&ctx, key, keysz); + mg_sha256_final(k, &ctx); + } + for (i = 0; i < sizeof(k); i++) { + i_pad[i] ^= k[i]; + o_pad[i] ^= k[i]; + } + mg_sha256_init(&ctx); + mg_sha256_update(&ctx, i_pad, sizeof(i_pad)); + mg_sha256_update(&ctx, data, datasz); + mg_sha256_final(dst, &ctx); + mg_sha256_init(&ctx); + mg_sha256_update(&ctx, o_pad, sizeof(o_pad)); + mg_sha256_update(&ctx, dst, 32); + mg_sha256_final(dst, &ctx); +} + diff --git a/src/sha256.h b/src/sha256.h new file mode 100644 index 00000000..b8473e9b --- /dev/null +++ b/src/sha256.h @@ -0,0 +1,16 @@ +#pragma once + +#include "arch.h" + +typedef struct { + uint32_t state[8]; + uint64_t bits; + uint32_t len; + unsigned char buffer[64]; +} mg_sha256_ctx; + +void mg_sha256_init(mg_sha256_ctx *); +void mg_sha256_update(mg_sha256_ctx *, const unsigned char *data, size_t len); +void mg_sha256_final(unsigned char digest[32], mg_sha256_ctx *); +void mg_hmac_sha256(uint8_t dst[32], uint8_t *key, size_t keysz, uint8_t *data, + size_t datasz); diff --git a/src/tls_aes128.c b/src/tls_aes128.c new file mode 100644 index 00000000..b4693081 --- /dev/null +++ b/src/tls_aes128.c @@ -0,0 +1,1002 @@ +/****************************************************************************** + * + * THIS SOURCE CODE IS HEREBY PLACED INTO THE PUBLIC DOMAIN FOR THE GOOD OF ALL + * + * This is a simple and straightforward implementation of the AES Rijndael + * 128-bit block cipher designed by Vincent Rijmen and Joan Daemen. The focus + * of this work was correctness & accuracy. It is written in 'C' without any + * particular focus upon optimization or speed. It should be endian (memory + * byte order) neutral since the few places that care are handled explicitly. + * + * This implementation of Rijndael was created by Steven M. Gibson of GRC.com. + * + * It is intended for general purpose use, but was written in support of GRC's + * reference implementation of the SQRL (Secure Quick Reliable Login) client. + * + * See: http://csrc.nist.gov/archive/aes/rijndael/wsdindex.html + * + * NO COPYRIGHT IS CLAIMED IN THIS WORK, HOWEVER, NEITHER IS ANY WARRANTY MADE + * REGARDING ITS FITNESS FOR ANY PARTICULAR PURPOSE. USE IT AT YOUR OWN RISK. + * + *******************************************************************************/ + +#include "tls_aes128.h" +#include "tls.h" + +#if MG_TLS == MG_TLS_BUILTIN +static int aes_tables_inited = 0; // run-once flag for performing key + // expasion table generation (see below) +/* + * The following static local tables must be filled-in before the first use of + * the GCM or AES ciphers. They are used for the AES key expansion/scheduling + * and once built are read-only and thread safe. The "gcm_initialize" function + * must be called once during system initialization to populate these arrays + * for subsequent use by the AES key scheduler. If they have not been built + * before attempted use, an error will be returned to the caller. + * + * NOTE: GCM Encryption/Decryption does NOT REQUIRE AES decryption. Since + * GCM uses AES in counter-mode, where the AES cipher output is XORed with + * the GCM input, we ONLY NEED AES encryption. Thus, to save space AES + * decryption is typically disabled by setting AES_DECRYPTION to 0 in aes.h. + */ +// We always need our forward tables +static uchar FSb[256]; // Forward substitution box (FSb) +static uint32_t FT0[256]; // Forward key schedule assembly tables +static uint32_t FT1[256]; +static uint32_t FT2[256]; +static uint32_t FT3[256]; + +#if AES_DECRYPTION // We ONLY need reverse for decryption +static uchar RSb[256]; // Reverse substitution box (RSb) +static uint32_t RT0[256]; // Reverse key schedule assembly tables +static uint32_t RT1[256]; +static uint32_t RT2[256]; +static uint32_t RT3[256]; +#endif /* AES_DECRYPTION */ + +static uint32_t RCON[10]; // AES round constants + +/* + * Platform Endianness Neutralizing Load and Store Macro definitions + * AES wants platform-neutral Little Endian (LE) byte ordering + */ +#define GET_UINT32_LE(n, b, i) \ + { \ + (n) = ((uint32_t) (b)[(i)]) | ((uint32_t) (b)[(i) + 1] << 8) | \ + ((uint32_t) (b)[(i) + 2] << 16) | ((uint32_t) (b)[(i) + 3] << 24); \ + } + +#define PUT_UINT32_LE(n, b, i) \ + { \ + (b)[(i)] = (uchar) ((n)); \ + (b)[(i) + 1] = (uchar) ((n) >> 8); \ + (b)[(i) + 2] = (uchar) ((n) >> 16); \ + (b)[(i) + 3] = (uchar) ((n) >> 24); \ + } + +/* + * AES forward and reverse encryption round processing macros + */ +#define AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \ + { \ + X0 = *RK++ ^ FT0[(Y0) &0xFF] ^ FT1[(Y1 >> 8) & 0xFF] ^ \ + FT2[(Y2 >> 16) & 0xFF] ^ FT3[(Y3 >> 24) & 0xFF]; \ + \ + X1 = *RK++ ^ FT0[(Y1) &0xFF] ^ FT1[(Y2 >> 8) & 0xFF] ^ \ + FT2[(Y3 >> 16) & 0xFF] ^ FT3[(Y0 >> 24) & 0xFF]; \ + \ + X2 = *RK++ ^ FT0[(Y2) &0xFF] ^ FT1[(Y3 >> 8) & 0xFF] ^ \ + FT2[(Y0 >> 16) & 0xFF] ^ FT3[(Y1 >> 24) & 0xFF]; \ + \ + X3 = *RK++ ^ FT0[(Y3) &0xFF] ^ FT1[(Y0 >> 8) & 0xFF] ^ \ + FT2[(Y1 >> 16) & 0xFF] ^ FT3[(Y2 >> 24) & 0xFF]; \ + } + +#define AES_RROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \ + { \ + X0 = *RK++ ^ RT0[(Y0) &0xFF] ^ RT1[(Y3 >> 8) & 0xFF] ^ \ + RT2[(Y2 >> 16) & 0xFF] ^ RT3[(Y1 >> 24) & 0xFF]; \ + \ + X1 = *RK++ ^ RT0[(Y1) &0xFF] ^ RT1[(Y0 >> 8) & 0xFF] ^ \ + RT2[(Y3 >> 16) & 0xFF] ^ RT3[(Y2 >> 24) & 0xFF]; \ + \ + X2 = *RK++ ^ RT0[(Y2) &0xFF] ^ RT1[(Y1 >> 8) & 0xFF] ^ \ + RT2[(Y0 >> 16) & 0xFF] ^ RT3[(Y3 >> 24) & 0xFF]; \ + \ + X3 = *RK++ ^ RT0[(Y3) &0xFF] ^ RT1[(Y2 >> 8) & 0xFF] ^ \ + RT2[(Y1 >> 16) & 0xFF] ^ RT3[(Y0 >> 24) & 0xFF]; \ + } + +/* + * These macros improve the readability of the key + * generation initialization code by collapsing + * repetitive common operations into logical pieces. + */ +#define ROTL8(x) ((x << 8) & 0xFFFFFFFF) | (x >> 24) +#define XTIME(x) ((x << 1) ^ ((x & 0x80) ? 0x1B : 0x00)) +#define MUL(x, y) ((x && y) ? pow[(log[x] + log[y]) % 255] : 0) +#define MIX(x, y) \ + { \ + y = ((y << 1) | (y >> 7)) & 0xFF; \ + x ^= y; \ + } +#define CPY128 \ + { \ + *RK++ = *SK++; \ + *RK++ = *SK++; \ + *RK++ = *SK++; \ + *RK++ = *SK++; \ + } + +/****************************************************************************** + * + * AES_INIT_KEYGEN_TABLES + * + * Fills the AES key expansion tables allocated above with their static + * data. This is not "per key" data, but static system-wide read-only + * table data. THIS FUNCTION IS NOT THREAD SAFE. It must be called once + * at system initialization to setup the tables for all subsequent use. + * + ******************************************************************************/ +void aes_init_keygen_tables(void) { + int i, x, y, z; // general purpose iteration and computation locals + int pow[256]; + int log[256]; + + if (aes_tables_inited) return; + + // fill the 'pow' and 'log' tables over GF(2^8) + for (i = 0, x = 1; i < 256; i++) { + pow[i] = x; + log[x] = i; + x = (x ^ XTIME(x)) & 0xFF; + } + // compute the round constants + for (i = 0, x = 1; i < 10; i++) { + RCON[i] = (uint32_t) x; + x = XTIME(x) & 0xFF; + } + // fill the forward and reverse substitution boxes + FSb[0x00] = 0x63; +#if AES_DECRYPTION // whether AES decryption is supported + RSb[0x63] = 0x00; +#endif /* AES_DECRYPTION */ + + for (i = 1; i < 256; i++) { + x = y = pow[255 - log[i]]; + MIX(x, y); + MIX(x, y); + MIX(x, y); + MIX(x, y); + FSb[i] = (uchar) (x ^= 0x63); +#if AES_DECRYPTION // whether AES decryption is supported + RSb[x] = (uchar) i; +#endif /* AES_DECRYPTION */ + } + // generate the forward and reverse key expansion tables + for (i = 0; i < 256; i++) { + x = FSb[i]; + y = XTIME(x) & 0xFF; + z = (y ^ x) & 0xFF; + + FT0[i] = ((uint32_t) y) ^ ((uint32_t) x << 8) ^ ((uint32_t) x << 16) ^ + ((uint32_t) z << 24); + + FT1[i] = ROTL8(FT0[i]); + FT2[i] = ROTL8(FT1[i]); + FT3[i] = ROTL8(FT2[i]); + +#if AES_DECRYPTION // whether AES decryption is supported + x = RSb[i]; + + RT0[i] = ((uint32_t) MUL(0x0E, x)) ^ ((uint32_t) MUL(0x09, x) << 8) ^ + ((uint32_t) MUL(0x0D, x) << 16) ^ ((uint32_t) MUL(0x0B, x) << 24); + + RT1[i] = ROTL8(RT0[i]); + RT2[i] = ROTL8(RT1[i]); + RT3[i] = ROTL8(RT2[i]); +#endif /* AES_DECRYPTION */ + } + aes_tables_inited = 1; // flag that the tables have been generated +} // to permit subsequent use of the AES cipher + +/****************************************************************************** + * + * AES_SET_ENCRYPTION_KEY + * + * This is called by 'aes_setkey' when we're establishing a key for + * subsequent encryption. We give it a pointer to the encryption + * context, a pointer to the key, and the key's length in bytes. + * Valid lengths are: 16, 24 or 32 bytes (128, 192, 256 bits). + * + ******************************************************************************/ +static int aes_set_encryption_key(aes_context *ctx, const uchar *key, uint keysize) { + uint i; // general purpose iteration local + uint32_t *RK = ctx->rk; // initialize our RoundKey buffer pointer + + for (i = 0; i < (keysize >> 2); i++) { + GET_UINT32_LE(RK[i], key, i << 2); + } + + switch (ctx->rounds) { + case 10: + for (i = 0; i < 10; i++, RK += 4) { + RK[4] = RK[0] ^ RCON[i] ^ ((uint32_t) FSb[(RK[3] >> 8) & 0xFF]) ^ + ((uint32_t) FSb[(RK[3] >> 16) & 0xFF] << 8) ^ + ((uint32_t) FSb[(RK[3] >> 24) & 0xFF] << 16) ^ + ((uint32_t) FSb[(RK[3]) & 0xFF] << 24); + + RK[5] = RK[1] ^ RK[4]; + RK[6] = RK[2] ^ RK[5]; + RK[7] = RK[3] ^ RK[6]; + } + break; + + case 12: + for (i = 0; i < 8; i++, RK += 6) { + RK[6] = RK[0] ^ RCON[i] ^ ((uint32_t) FSb[(RK[5] >> 8) & 0xFF]) ^ + ((uint32_t) FSb[(RK[5] >> 16) & 0xFF] << 8) ^ + ((uint32_t) FSb[(RK[5] >> 24) & 0xFF] << 16) ^ + ((uint32_t) FSb[(RK[5]) & 0xFF] << 24); + + RK[7] = RK[1] ^ RK[6]; + RK[8] = RK[2] ^ RK[7]; + RK[9] = RK[3] ^ RK[8]; + RK[10] = RK[4] ^ RK[9]; + RK[11] = RK[5] ^ RK[10]; + } + break; + + case 14: + for (i = 0; i < 7; i++, RK += 8) { + RK[8] = RK[0] ^ RCON[i] ^ ((uint32_t) FSb[(RK[7] >> 8) & 0xFF]) ^ + ((uint32_t) FSb[(RK[7] >> 16) & 0xFF] << 8) ^ + ((uint32_t) FSb[(RK[7] >> 24) & 0xFF] << 16) ^ + ((uint32_t) FSb[(RK[7]) & 0xFF] << 24); + + RK[9] = RK[1] ^ RK[8]; + RK[10] = RK[2] ^ RK[9]; + RK[11] = RK[3] ^ RK[10]; + + RK[12] = RK[4] ^ ((uint32_t) FSb[(RK[11]) & 0xFF]) ^ + ((uint32_t) FSb[(RK[11] >> 8) & 0xFF] << 8) ^ + ((uint32_t) FSb[(RK[11] >> 16) & 0xFF] << 16) ^ + ((uint32_t) FSb[(RK[11] >> 24) & 0xFF] << 24); + + RK[13] = RK[5] ^ RK[12]; + RK[14] = RK[6] ^ RK[13]; + RK[15] = RK[7] ^ RK[14]; + } + break; + + default: + return -1; + } + return (0); +} + +#if AES_DECRYPTION // whether AES decryption is supported + +/****************************************************************************** + * + * AES_SET_DECRYPTION_KEY + * + * This is called by 'aes_setkey' when we're establishing a + * key for subsequent decryption. We give it a pointer to + * the encryption context, a pointer to the key, and the key's + * length in bits. Valid lengths are: 128, 192, or 256 bits. + * + ******************************************************************************/ +static int aes_set_decryption_key(aes_context *ctx, const uchar *key, uint keysize) { + int i, j; + aes_context cty; // a calling aes context for set_encryption_key + uint32_t *RK = ctx->rk; // initialize our RoundKey buffer pointer + uint32_t *SK; + int ret; + + cty.rounds = ctx->rounds; // initialize our local aes context + cty.rk = cty.buf; // round count and key buf pointer + + if ((ret = aes_set_encryption_key(&cty, key, keysize)) != 0) return (ret); + + SK = cty.rk + cty.rounds * 4; + + CPY128 // copy a 128-bit block from *SK to *RK + + for (i = ctx->rounds - 1, SK -= 8; i > 0; i--, SK -= 8) { + for (j = 0; j < 4; j++, SK++) { + *RK++ = RT0[FSb[(*SK) & 0xFF]] ^ RT1[FSb[(*SK >> 8) & 0xFF]] ^ + RT2[FSb[(*SK >> 16) & 0xFF]] ^ RT3[FSb[(*SK >> 24) & 0xFF]]; + } + } + CPY128 // copy a 128-bit block from *SK to *RK + memset(&cty, 0, sizeof(aes_context)); // clear local aes context + return (0); +} + +#endif /* AES_DECRYPTION */ + +/****************************************************************************** + * + * AES_SETKEY + * + * Invoked to establish the key schedule for subsequent encryption/decryption + * + ******************************************************************************/ +int aes_setkey(aes_context *ctx, // AES context provided by our caller + int mode, // ENCRYPT or DECRYPT flag + const uchar *key, // pointer to the key + uint keysize) // key length in bytes +{ + // since table initialization is not thread safe, we could either add + // system-specific mutexes and init the AES key generation tables on + // demand, or ask the developer to simply call "gcm_initialize" once during + // application startup before threading begins. That's what we choose. + if (!aes_tables_inited) return (-1); // fail the call when not inited. + + ctx->mode = mode; // capture the key type we're creating + ctx->rk = ctx->buf; // initialize our round key pointer + + switch (keysize) // set the rounds count based upon the keysize + { + case 16: + ctx->rounds = 10; + break; // 16-byte, 128-bit key + case 24: + ctx->rounds = 12; + break; // 24-byte, 192-bit key + case 32: + ctx->rounds = 14; + break; // 32-byte, 256-bit key + default: + return (-1); + } + +#if AES_DECRYPTION + if (mode == DECRYPT) // expand our key for encryption or decryption + return (aes_set_decryption_key(ctx, key, keysize)); + else /* ENCRYPT */ +#endif /* AES_DECRYPTION */ + return (aes_set_encryption_key(ctx, key, keysize)); +} + +/****************************************************************************** + * + * AES_CIPHER + * + * Perform AES encryption and decryption. + * The AES context will have been setup with the encryption mode + * and all keying information appropriate for the task. + * + ******************************************************************************/ +int aes_cipher(aes_context *ctx, const uchar input[16], uchar output[16]) { + int i; + uint32_t *RK, X0, X1, X2, X3, Y0, Y1, Y2, Y3; // general purpose locals + + RK = ctx->rk; + + GET_UINT32_LE(X0, input, 0); + X0 ^= *RK++; // load our 128-bit + GET_UINT32_LE(X1, input, 4); + X1 ^= *RK++; // input buffer in a storage + GET_UINT32_LE(X2, input, 8); + X2 ^= *RK++; // memory endian-neutral way + GET_UINT32_LE(X3, input, 12); + X3 ^= *RK++; + +#if AES_DECRYPTION // whether AES decryption is supported + + if (ctx->mode == DECRYPT) { + for (i = (ctx->rounds >> 1) - 1; i > 0; i--) { + AES_RROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3); + AES_RROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3); + } + + AES_RROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3); + + X0 = *RK++ ^ ((uint32_t) RSb[(Y0) &0xFF]) ^ + ((uint32_t) RSb[(Y3 >> 8) & 0xFF] << 8) ^ + ((uint32_t) RSb[(Y2 >> 16) & 0xFF] << 16) ^ + ((uint32_t) RSb[(Y1 >> 24) & 0xFF] << 24); + + X1 = *RK++ ^ ((uint32_t) RSb[(Y1) &0xFF]) ^ + ((uint32_t) RSb[(Y0 >> 8) & 0xFF] << 8) ^ + ((uint32_t) RSb[(Y3 >> 16) & 0xFF] << 16) ^ + ((uint32_t) RSb[(Y2 >> 24) & 0xFF] << 24); + + X2 = *RK++ ^ ((uint32_t) RSb[(Y2) &0xFF]) ^ + ((uint32_t) RSb[(Y1 >> 8) & 0xFF] << 8) ^ + ((uint32_t) RSb[(Y0 >> 16) & 0xFF] << 16) ^ + ((uint32_t) RSb[(Y3 >> 24) & 0xFF] << 24); + + X3 = *RK++ ^ ((uint32_t) RSb[(Y3) &0xFF]) ^ + ((uint32_t) RSb[(Y2 >> 8) & 0xFF] << 8) ^ + ((uint32_t) RSb[(Y1 >> 16) & 0xFF] << 16) ^ + ((uint32_t) RSb[(Y0 >> 24) & 0xFF] << 24); + } else /* ENCRYPT */ + { +#endif /* AES_DECRYPTION */ + + for (i = (ctx->rounds >> 1) - 1; i > 0; i--) { + AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3); + AES_FROUND(X0, X1, X2, X3, Y0, Y1, Y2, Y3); + } + + AES_FROUND(Y0, Y1, Y2, Y3, X0, X1, X2, X3); + + X0 = *RK++ ^ ((uint32_t) FSb[(Y0) &0xFF]) ^ + ((uint32_t) FSb[(Y1 >> 8) & 0xFF] << 8) ^ + ((uint32_t) FSb[(Y2 >> 16) & 0xFF] << 16) ^ + ((uint32_t) FSb[(Y3 >> 24) & 0xFF] << 24); + + X1 = *RK++ ^ ((uint32_t) FSb[(Y1) &0xFF]) ^ + ((uint32_t) FSb[(Y2 >> 8) & 0xFF] << 8) ^ + ((uint32_t) FSb[(Y3 >> 16) & 0xFF] << 16) ^ + ((uint32_t) FSb[(Y0 >> 24) & 0xFF] << 24); + + X2 = *RK++ ^ ((uint32_t) FSb[(Y2) &0xFF]) ^ + ((uint32_t) FSb[(Y3 >> 8) & 0xFF] << 8) ^ + ((uint32_t) FSb[(Y0 >> 16) & 0xFF] << 16) ^ + ((uint32_t) FSb[(Y1 >> 24) & 0xFF] << 24); + + X3 = *RK++ ^ ((uint32_t) FSb[(Y3) &0xFF]) ^ + ((uint32_t) FSb[(Y0 >> 8) & 0xFF] << 8) ^ + ((uint32_t) FSb[(Y1 >> 16) & 0xFF] << 16) ^ + ((uint32_t) FSb[(Y2 >> 24) & 0xFF] << 24); + +#if AES_DECRYPTION // whether AES decryption is supported + } +#endif /* AES_DECRYPTION */ + + PUT_UINT32_LE(X0, output, 0); + PUT_UINT32_LE(X1, output, 4); + PUT_UINT32_LE(X2, output, 8); + PUT_UINT32_LE(X3, output, 12); + + return (0); +} +/* end of aes.c */ +/****************************************************************************** + * + * THIS SOURCE CODE IS HEREBY PLACED INTO THE PUBLIC DOMAIN FOR THE GOOD OF ALL + * + * This is a simple and straightforward implementation of AES-GCM authenticated + * encryption. The focus of this work was correctness & accuracy. It is written + * in straight 'C' without any particular focus upon optimization or speed. It + * should be endian (memory byte order) neutral since the few places that care + * are handled explicitly. + * + * This implementation of AES-GCM was created by Steven M. Gibson of GRC.com. + * + * It is intended for general purpose use, but was written in support of GRC's + * reference implementation of the SQRL (Secure Quick Reliable Login) client. + * + * See: http://csrc.nist.gov/publications/nistpubs/800-38D/SP-800-38D.pdf + * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/ + * gcm/gcm-revised-spec.pdf + * + * NO COPYRIGHT IS CLAIMED IN THIS WORK, HOWEVER, NEITHER IS ANY WARRANTY MADE + * REGARDING ITS FITNESS FOR ANY PARTICULAR PURPOSE. USE IT AT YOUR OWN RISK. + * + *******************************************************************************/ + + +/****************************************************************************** + * ==== IMPLEMENTATION WARNING ==== + * + * This code was developed for use within SQRL's fixed environmnent. Thus, it + * is somewhat less "general purpose" than it would be if it were designed as + * a general purpose AES-GCM library. Specifically, it bothers with almost NO + * error checking on parameter limits, buffer bounds, etc. It assumes that it + * is being invoked by its author or by someone who understands the values it + * expects to receive. Its behavior will be undefined otherwise. + * + * All functions that might fail are defined to return 'ints' to indicate a + * problem. Most do not do so now. But this allows for error propagation out + * of internal functions if robust error checking should ever be desired. + * + ******************************************************************************/ + +/* Calculating the "GHASH" + * + * There are many ways of calculating the so-called GHASH in software, each with + * a traditional size vs performance tradeoff. The GHASH (Galois field hash) is + * an intriguing construction which takes two 128-bit strings (also the cipher's + * block size and the fundamental operation size for the system) and hashes them + * into a third 128-bit result. + * + * Many implementation solutions have been worked out that use large precomputed + * table lookups in place of more time consuming bit fiddling, and this approach + * can be scaled easily upward or downward as needed to change the time/space + * tradeoff. It's been studied extensively and there's a solid body of theory + * and practice. For example, without using any lookup tables an implementation + * might obtain 119 cycles per byte throughput, whereas using a simple, though + * large, key-specific 64 kbyte 8-bit lookup table the performance jumps to 13 + * cycles per byte. + * + * And Intel's processors have, since 2010, included an instruction which does + * the entire 128x128->128 bit job in just several 64x64->128 bit pieces. + * + * Since SQRL is interactive, and only processing a few 128-bit blocks, I've + * settled upon a relatively slower but appealing small-table compromise which + * folds a bunch of not only time consuming but also bit twiddling into a simple + * 16-entry table which is attributed to Victor Shoup's 1996 work while at + * Bellcore: "On Fast and Provably Secure MessageAuthentication Based on + * Universal Hashing." See: http://www.shoup.net/papers/macs.pdf + * See, also section 4.1 of the "gcm-revised-spec" cited above. + */ + +/* + * This 16-entry table of pre-computed constants is used by the + * GHASH multiplier to improve over a strictly table-free but + * significantly slower 128x128 bit multiple within GF(2^128). + */ +static const uint64_t last4[16] = { + 0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0, + 0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0}; + +/* + * Platform Endianness Neutralizing Load and Store Macro definitions + * GCM wants platform-neutral Big Endian (BE) byte ordering + */ +#define GET_UINT32_BE(n, b, i) \ + { \ + (n) = ((uint32_t) (b)[(i)] << 24) | ((uint32_t) (b)[(i) + 1] << 16) | \ + ((uint32_t) (b)[(i) + 2] << 8) | ((uint32_t) (b)[(i) + 3]); \ + } + +#define PUT_UINT32_BE(n, b, i) \ + { \ + (b)[(i)] = (uchar) ((n) >> 24); \ + (b)[(i) + 1] = (uchar) ((n) >> 16); \ + (b)[(i) + 2] = (uchar) ((n) >> 8); \ + (b)[(i) + 3] = (uchar) ((n)); \ + } + +/****************************************************************************** + * + * GCM_INITIALIZE + * + * Must be called once to initialize the GCM library. + * + * At present, this only calls the AES keygen table generator, which expands + * the AES keying tables for use. This is NOT A THREAD-SAFE function, so it + * MUST be called during system initialization before a multi-threading + * environment is running. + * + ******************************************************************************/ +int gcm_initialize(void) { + aes_init_keygen_tables(); + return (0); +} + +/****************************************************************************** + * + * GCM_MULT + * + * Performs a GHASH operation on the 128-bit input vector 'x', setting + * the 128-bit output vector to 'x' times H using our precomputed tables. + * 'x' and 'output' are seen as elements of GCM's GF(2^128) Galois field. + * + ******************************************************************************/ +static void gcm_mult(gcm_context *ctx, // pointer to established context + const uchar x[16], // pointer to 128-bit input vector + uchar output[16]) // pointer to 128-bit output vector +{ + int i; + uchar lo, hi, rem; + uint64_t zh, zl; + + lo = (uchar) (x[15] & 0x0f); + hi = (uchar) (x[15] >> 4); + zh = ctx->HH[lo]; + zl = ctx->HL[lo]; + + for (i = 15; i >= 0; i--) { + lo = (uchar) (x[i] & 0x0f); + hi = (uchar) (x[i] >> 4); + + if (i != 15) { + rem = (uchar) (zl & 0x0f); + zl = (zh << 60) | (zl >> 4); + zh = (zh >> 4); + zh ^= (uint64_t) last4[rem] << 48; + zh ^= ctx->HH[lo]; + zl ^= ctx->HL[lo]; + } + rem = (uchar) (zl & 0x0f); + zl = (zh << 60) | (zl >> 4); + zh = (zh >> 4); + zh ^= (uint64_t) last4[rem] << 48; + zh ^= ctx->HH[hi]; + zl ^= ctx->HL[hi]; + } + PUT_UINT32_BE(zh >> 32, output, 0); + PUT_UINT32_BE(zh, output, 4); + PUT_UINT32_BE(zl >> 32, output, 8); + PUT_UINT32_BE(zl, output, 12); +} + +/****************************************************************************** + * + * GCM_SETKEY + * + * This is called to set the AES-GCM key. It initializes the AES key + * and populates the gcm context's pre-calculated HTables. + * + ******************************************************************************/ +int gcm_setkey(gcm_context *ctx, // pointer to caller-provided gcm context + const uchar *key, // pointer to the AES encryption key + const uint keysize) // size in bytes (must be 16, 24, 32 for + // 128, 192 or 256-bit keys respectively) +{ + int ret, i, j; + uint64_t hi, lo; + uint64_t vl, vh; + unsigned char h[16]; + + memset(ctx, 0, sizeof(gcm_context)); // zero caller-provided GCM context + memset(h, 0, 16); // initialize the block to encrypt + + // encrypt the null 128-bit block to generate a key-based value + // which is then used to initialize our GHASH lookup tables + if ((ret = aes_setkey(&ctx->aes_ctx, ENCRYPT, key, keysize)) != 0) + return (ret); + if ((ret = aes_cipher(&ctx->aes_ctx, h, h)) != 0) return (ret); + + GET_UINT32_BE(hi, h, 0); // pack h as two 64-bit ints, big-endian + GET_UINT32_BE(lo, h, 4); + vh = (uint64_t) hi << 32 | lo; + + GET_UINT32_BE(hi, h, 8); + GET_UINT32_BE(lo, h, 12); + vl = (uint64_t) hi << 32 | lo; + + ctx->HL[8] = vl; // 8 = 1000 corresponds to 1 in GF(2^128) + ctx->HH[8] = vh; + ctx->HH[0] = 0; // 0 corresponds to 0 in GF(2^128) + ctx->HL[0] = 0; + + for (i = 4; i > 0; i >>= 1) { + uint32_t T = (uint32_t) (vl & 1) * 0xe1000000U; + vl = (vh << 63) | (vl >> 1); + vh = (vh >> 1) ^ ((uint64_t) T << 32); + ctx->HL[i] = vl; + ctx->HH[i] = vh; + } + for (i = 2; i < 16; i <<= 1) { + uint64_t *HiL = ctx->HL + i, *HiH = ctx->HH + i; + vh = *HiH; + vl = *HiL; + for (j = 1; j < i; j++) { + HiH[j] = vh ^ ctx->HH[j]; + HiL[j] = vl ^ ctx->HL[j]; + } + } + return (0); +} + +/****************************************************************************** + * + * GCM processing occurs four phases: SETKEY, START, UPDATE and FINISH. + * + * SETKEY: + * + * START: Sets the Encryption/Decryption mode. + * Accepts the initialization vector and additional data. + * + * UPDATE: Encrypts or decrypts the plaintext or ciphertext. + * + * FINISH: Performs a final GHASH to generate the authentication tag. + * + ****************************************************************************** + * + * GCM_START + * + * Given a user-provided GCM context, this initializes it, sets the encryption + * mode, and preprocesses the initialization vector and additional AEAD data. + * + ******************************************************************************/ +int gcm_start(gcm_context *ctx, // pointer to user-provided GCM context + int mode, // GCM_ENCRYPT or GCM_DECRYPT + const uchar *iv, // pointer to initialization vector + size_t iv_len, // IV length in bytes (should == 12) + const uchar *add, // ptr to additional AEAD data (NULL if none) + size_t add_len) // length of additional AEAD data (bytes) +{ + int ret; // our error return if the AES encrypt fails + uchar work_buf[16]; // XOR source built from provided IV if len != 16 + const uchar *p; // general purpose array pointer + size_t use_len; // byte count to process, up to 16 bytes + size_t i; // local loop iterator + + // since the context might be reused under the same key + // we zero the working buffers for this next new process + memset(ctx->y, 0x00, sizeof(ctx->y)); + memset(ctx->buf, 0x00, sizeof(ctx->buf)); + ctx->len = 0; + ctx->add_len = 0; + + ctx->mode = mode; // set the GCM encryption/decryption mode + ctx->aes_ctx.mode = ENCRYPT; // GCM *always* runs AES in ENCRYPTION mode + + if (iv_len == 12) { // GCM natively uses a 12-byte, 96-bit IV + memcpy(ctx->y, iv, iv_len); // copy the IV to the top of the 'y' buff + ctx->y[15] = 1; // start "counting" from 1 (not 0) + } else // if we don't have a 12-byte IV, we GHASH whatever we've been given + { + memset(work_buf, 0x00, 16); // clear the working buffer + PUT_UINT32_BE(iv_len * 8, work_buf, 12); // place the IV into buffer + + p = iv; + while (iv_len > 0) { + use_len = (iv_len < 16) ? iv_len : 16; + for (i = 0; i < use_len; i++) ctx->y[i] ^= p[i]; + gcm_mult(ctx, ctx->y, ctx->y); + iv_len -= use_len; + p += use_len; + } + for (i = 0; i < 16; i++) ctx->y[i] ^= work_buf[i]; + gcm_mult(ctx, ctx->y, ctx->y); + } + if ((ret = aes_cipher(&ctx->aes_ctx, ctx->y, ctx->base_ectr)) != 0) + return (ret); + + ctx->add_len = add_len; + p = add; + while (add_len > 0) { + use_len = (add_len < 16) ? add_len : 16; + for (i = 0; i < use_len; i++) ctx->buf[i] ^= p[i]; + gcm_mult(ctx, ctx->buf, ctx->buf); + add_len -= use_len; + p += use_len; + } + return (0); +} + +/****************************************************************************** + * + * GCM_UPDATE + * + * This is called once or more to process bulk plaintext or ciphertext data. + * We give this some number of bytes of input and it returns the same number + * of output bytes. If called multiple times (which is fine) all but the final + * invocation MUST be called with length mod 16 == 0. (Only the final call can + * have a partial block length of < 128 bits.) + * + ******************************************************************************/ +int gcm_update(gcm_context *ctx, // pointer to user-provided GCM context + size_t length, // length, in bytes, of data to process + const uchar *input, // pointer to source data + uchar *output) // pointer to destination data +{ + int ret; // our error return if the AES encrypt fails + uchar ectr[16]; // counter-mode cipher output for XORing + size_t use_len; // byte count to process, up to 16 bytes + size_t i; // local loop iterator + + ctx->len += length; // bump the GCM context's running length count + + while (length > 0) { + // clamp the length to process at 16 bytes + use_len = (length < 16) ? length : 16; + + // increment the context's 128-bit IV||Counter 'y' vector + for (i = 16; i > 12; i--) + if (++ctx->y[i - 1] != 0) break; + + // encrypt the context's 'y' vector under the established key + if ((ret = aes_cipher(&ctx->aes_ctx, ctx->y, ectr)) != 0) return (ret); + + // encrypt or decrypt the input to the output + if (ctx->mode == ENCRYPT) { + for (i = 0; i < use_len; i++) { + // XOR the cipher's ouptut vector (ectr) with our input + output[i] = (uchar) (ectr[i] ^ input[i]); + // now we mix in our data into the authentication hash. + // if we're ENcrypting we XOR in the post-XOR (output) + // results, but if we're DEcrypting we XOR in the input + // data + ctx->buf[i] ^= output[i]; + } + } else { + for (i = 0; i < use_len; i++) { + // but if we're DEcrypting we XOR in the input data first, + // i.e. before saving to ouput data, otherwise if the input + // and output buffer are the same (inplace decryption) we + // would not get the correct auth tag + + ctx->buf[i] ^= input[i]; + + // XOR the cipher's ouptut vector (ectr) with our input + output[i] = (uchar) (ectr[i] ^ input[i]); + } + } + gcm_mult(ctx, ctx->buf, ctx->buf); // perform a GHASH operation + + length -= use_len; // drop the remaining byte count to process + input += use_len; // bump our input pointer forward + output += use_len; // bump our output pointer forward + } + return (0); +} + +/****************************************************************************** + * + * GCM_FINISH + * + * This is called once after all calls to GCM_UPDATE to finalize the GCM. + * It performs the final GHASH to produce the resulting authentication TAG. + * + ******************************************************************************/ +int gcm_finish(gcm_context *ctx, // pointer to user-provided GCM context + uchar *tag, // pointer to buffer which receives the tag + size_t tag_len) // length, in bytes, of the tag-receiving buf +{ + uchar work_buf[16]; + uint64_t orig_len = ctx->len * 8; + uint64_t orig_add_len = ctx->add_len * 8; + size_t i; + + if (tag_len != 0) memcpy(tag, ctx->base_ectr, tag_len); + + if (orig_len || orig_add_len) { + memset(work_buf, 0x00, 16); + + PUT_UINT32_BE((orig_add_len >> 32), work_buf, 0); + PUT_UINT32_BE((orig_add_len), work_buf, 4); + PUT_UINT32_BE((orig_len >> 32), work_buf, 8); + PUT_UINT32_BE((orig_len), work_buf, 12); + + for (i = 0; i < 16; i++) ctx->buf[i] ^= work_buf[i]; + gcm_mult(ctx, ctx->buf, ctx->buf); + for (i = 0; i < tag_len; i++) tag[i] ^= ctx->buf[i]; + } + return (0); +} + +/****************************************************************************** + * + * GCM_CRYPT_AND_TAG + * + * This either encrypts or decrypts the user-provided data and, either + * way, generates an authentication tag of the requested length. It must be + * called with a GCM context whose key has already been set with GCM_SETKEY. + * + * The user would typically call this explicitly to ENCRYPT a buffer of data + * and optional associated data, and produce its an authentication tag. + * + * To reverse the process the user would typically call the companion + * GCM_AUTH_DECRYPT function to decrypt data and verify a user-provided + * authentication tag. The GCM_AUTH_DECRYPT function calls this function + * to perform its decryption and tag generation, which it then compares. + * + ******************************************************************************/ +int gcm_crypt_and_tag( + gcm_context *ctx, // gcm context with key already setup + int mode, // cipher direction: GCM_ENCRYPT or GCM_DECRYPT + const uchar *iv, // pointer to the 12-byte initialization vector + size_t iv_len, // byte length if the IV. should always be 12 + const uchar *add, // pointer to the non-ciphered additional data + size_t add_len, // byte length of the additional AEAD data + const uchar *input, // pointer to the cipher data source + uchar *output, // pointer to the cipher data destination + size_t length, // byte length of the cipher data + uchar *tag, // pointer to the tag to be generated + size_t tag_len) // byte length of the tag to be generated +{ /* + assuming that the caller has already invoked gcm_setkey to + prepare the gcm context with the keying material, we simply + invoke each of the three GCM sub-functions in turn... + */ + gcm_start(ctx, mode, iv, iv_len, add, add_len); + gcm_update(ctx, length, input, output); + gcm_finish(ctx, tag, tag_len); + return (0); +} + +/****************************************************************************** + * + * GCM_AUTH_DECRYPT + * + * This DECRYPTS a user-provided data buffer with optional associated data. + * It then verifies a user-supplied authentication tag against the tag just + * re-created during decryption to verify that the data has not been altered. + * + * This function calls GCM_CRYPT_AND_TAG (above) to perform the decryption + * and authentication tag generation. + * + ******************************************************************************/ +int gcm_auth_decrypt( + gcm_context *ctx, // gcm context with key already setup + const uchar *iv, // pointer to the 12-byte initialization vector + size_t iv_len, // byte length if the IV. should always be 12 + const uchar *add, // pointer to the non-ciphered additional data + size_t add_len, // byte length of the additional AEAD data + const uchar *input, // pointer to the cipher data source + uchar *output, // pointer to the cipher data destination + size_t length, // byte length of the cipher data + const uchar *tag, // pointer to the tag to be authenticated + size_t tag_len) // byte length of the tag <= 16 +{ + uchar check_tag[16]; // the tag generated and returned by decryption + int diff; // an ORed flag to detect authentication errors + size_t i; // our local iterator + /* + we use GCM_DECRYPT_AND_TAG (above) to perform our decryption + (which is an identical XORing to reverse the previous one) + and also to re-generate the matching authentication tag + */ + gcm_crypt_and_tag(ctx, DECRYPT, iv, iv_len, add, add_len, input, output, + length, check_tag, tag_len); + + // now we verify the authentication tag in 'constant time' + for (diff = 0, i = 0; i < tag_len; i++) diff |= tag[i] ^ check_tag[i]; + + if (diff != 0) { // see whether any bits differed? + memset(output, 0, length); // if so... wipe the output data + return (GCM_AUTH_FAILURE); // return GCM_AUTH_FAILURE + } + return (0); +} + +/****************************************************************************** + * + * GCM_ZERO_CTX + * + * The GCM context contains both the GCM context and the AES context. + * This includes keying and key-related material which is security- + * sensitive, so it MUST be zeroed after use. This function does that. + * + ******************************************************************************/ +void gcm_zero_ctx(gcm_context *ctx) { + // zero the context originally provided to us + memset(ctx, 0, sizeof(gcm_context)); +} +// +// aes-gcm.c +// Pods +// +// Created by Markus Kosmal on 20/11/14. +// +// + +int aes_gcm_encrypt(unsigned char *output, // + const unsigned char *input, size_t input_length, + const unsigned char *key, const size_t key_len, + const unsigned char *iv, const size_t iv_len, + unsigned char *aead, size_t aead_len, unsigned char *tag, + const size_t tag_len) { + int ret = 0; // our return value + gcm_context ctx; // includes the AES context structure + + gcm_setkey(&ctx, key, (const uint) key_len); + + ret = gcm_crypt_and_tag(&ctx, ENCRYPT, iv, iv_len, aead, aead_len, input, output, + input_length, tag, tag_len); + + gcm_zero_ctx(&ctx); + + return (ret); +} + +int aes_gcm_decrypt(unsigned char *output, const unsigned char *input, + size_t input_length, const unsigned char *key, + const size_t key_len, const unsigned char *iv, + const size_t iv_len) { + int ret = 0; // our return value + gcm_context ctx; // includes the AES context structure + + size_t tag_len = 0; + unsigned char *tag_buf = NULL; + + gcm_setkey(&ctx, key, (const uint) key_len); + + ret = gcm_crypt_and_tag(&ctx, DECRYPT, iv, iv_len, NULL, 0, input, output, + input_length, tag_buf, tag_len); + + gcm_zero_ctx(&ctx); + + return (ret); +} +#endif diff --git a/src/tls_aes128.h b/src/tls_aes128.h new file mode 100644 index 00000000..d136fc78 --- /dev/null +++ b/src/tls_aes128.h @@ -0,0 +1,263 @@ +/****************************************************************************** + * + * THIS SOURCE CODE IS HEREBY PLACED INTO THE PUBLIC DOMAIN FOR THE GOOD OF ALL + * + * This is a simple and straightforward implementation of the AES Rijndael + * 128-bit block cipher designed by Vincent Rijmen and Joan Daemen. The focus + * of this work was correctness & accuracy. It is written in 'C' without any + * particular focus upon optimization or speed. It should be endian (memory + * byte order) neutral since the few places that care are handled explicitly. + * + * This implementation of Rijndael was created by Steven M. Gibson of GRC.com. + * + * It is intended for general purpose use, but was written in support of GRC's + * reference implementation of the SQRL (Secure Quick Reliable Login) client. + * + * See: http://csrc.nist.gov/archive/aes/rijndael/wsdindex.html + * + * NO COPYRIGHT IS CLAIMED IN THIS WORK, HOWEVER, NEITHER IS ANY WARRANTY MADE + * REGARDING ITS FITNESS FOR ANY PARTICULAR PURPOSE. USE IT AT YOUR OWN RISK. + * + *******************************************************************************/ + +#ifndef AES_HEADER +#define AES_HEADER + +/******************************************************************************/ +#define AES_DECRYPTION 1 // whether AES decryption is supported +/******************************************************************************/ + +#define ENCRYPT 1 // specify whether we're encrypting +#define DECRYPT 0 // or decrypting + +#include "arch.h" + +typedef unsigned char uchar; // add some convienent shorter types +typedef unsigned int uint; + +/****************************************************************************** + * AES_INIT_KEYGEN_TABLES : MUST be called once before any AES use + ******************************************************************************/ +void aes_init_keygen_tables(void); + +/****************************************************************************** + * AES_CONTEXT : cipher context / holds inter-call data + ******************************************************************************/ +typedef struct { + int mode; // 1 for Encryption, 0 for Decryption + int rounds; // keysize-based rounds count + uint32_t *rk; // pointer to current round key + uint32_t buf[68]; // key expansion buffer +} aes_context; + +/****************************************************************************** + * AES_SETKEY : called to expand the key for encryption or decryption + ******************************************************************************/ +int aes_setkey(aes_context *ctx, // pointer to context + int mode, // 1 or 0 for Encrypt/Decrypt + const uchar *key, // AES input key + uint keysize); // size in bytes (must be 16, 24, 32 for + // 128, 192 or 256-bit keys respectively) + // returns 0 for success + +/****************************************************************************** + * AES_CIPHER : called to encrypt or decrypt ONE 128-bit block of data + ******************************************************************************/ +int aes_cipher(aes_context *ctx, // pointer to context + const uchar input[16], // 128-bit block to en/decipher + uchar output[16]); // 128-bit output result block + // returns 0 for success + +#endif /* AES_HEADER */ +/****************************************************************************** + * + * THIS SOURCE CODE IS HEREBY PLACED INTO THE PUBLIC DOMAIN FOR THE GOOD OF ALL + * + * This is a simple and straightforward implementation of AES-GCM authenticated + * encryption. The focus of this work was correctness & accuracy. It is written + * in straight 'C' without any particular focus upon optimization or speed. It + * should be endian (memory byte order) neutral since the few places that care + * are handled explicitly. + * + * This implementation of AES-GCM was created by Steven M. Gibson of GRC.com. + * + * It is intended for general purpose use, but was written in support of GRC's + * reference implementation of the SQRL (Secure Quick Reliable Login) client. + * + * See: http://csrc.nist.gov/publications/nistpubs/800-38D/SP-800-38D.pdf + * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/ \ + * gcm/gcm-revised-spec.pdf + * + * NO COPYRIGHT IS CLAIMED IN THIS WORK, HOWEVER, NEITHER IS ANY WARRANTY MADE + * REGARDING ITS FITNESS FOR ANY PARTICULAR PURPOSE. USE IT AT YOUR OWN RISK. + * + *******************************************************************************/ +#ifndef GCM_HEADER +#define GCM_HEADER + +#include "arch.h" +#define GCM_AUTH_FAILURE 0x55555555 // authentication failure + +/****************************************************************************** + * GCM_CONTEXT : GCM context / holds keytables, instance data, and AES ctx + ******************************************************************************/ +typedef struct { + int mode; // cipher direction: encrypt/decrypt + uint64_t len; // cipher data length processed so far + uint64_t add_len; // total add data length + uint64_t HL[16]; // precalculated lo-half HTable + uint64_t HH[16]; // precalculated hi-half HTable + uchar base_ectr[16]; // first counter-mode cipher output for tag + uchar y[16]; // the current cipher-input IV|Counter value + uchar buf[16]; // buf working value + aes_context aes_ctx; // cipher context used +} gcm_context; + +/****************************************************************************** + * GCM_CONTEXT : MUST be called once before ANY use of this library + ******************************************************************************/ +int gcm_initialize(void); + +/****************************************************************************** + * GCM_SETKEY : sets the GCM (and AES) keying material for use + ******************************************************************************/ +int gcm_setkey(gcm_context *ctx, // caller-provided context ptr + const uchar *key, // pointer to cipher key + const uint keysize // size in bytes (must be 16, 24, 32 for + // 128, 192 or 256-bit keys respectively) +); // returns 0 for success + +/****************************************************************************** + * + * GCM_CRYPT_AND_TAG + * + * This either encrypts or decrypts the user-provided data and, either + * way, generates an authentication tag of the requested length. It must be + * called with a GCM context whose key has already been set with GCM_SETKEY. + * + * The user would typically call this explicitly to ENCRYPT a buffer of data + * and optional associated data, and produce its an authentication tag. + * + * To reverse the process the user would typically call the companion + * GCM_AUTH_DECRYPT function to decrypt data and verify a user-provided + * authentication tag. The GCM_AUTH_DECRYPT function calls this function + * to perform its decryption and tag generation, which it then compares. + * + ******************************************************************************/ +int gcm_crypt_and_tag( + gcm_context *ctx, // gcm context with key already setup + int mode, // cipher direction: ENCRYPT (1) or DECRYPT (0) + const uchar *iv, // pointer to the 12-byte initialization vector + size_t iv_len, // byte length if the IV. should always be 12 + const uchar *add, // pointer to the non-ciphered additional data + size_t add_len, // byte length of the additional AEAD data + const uchar *input, // pointer to the cipher data source + uchar *output, // pointer to the cipher data destination + size_t length, // byte length of the cipher data + uchar *tag, // pointer to the tag to be generated + size_t tag_len); // byte length of the tag to be generated + +/****************************************************************************** + * + * GCM_AUTH_DECRYPT + * + * This DECRYPTS a user-provided data buffer with optional associated data. + * It then verifies a user-supplied authentication tag against the tag just + * re-created during decryption to verify that the data has not been altered. + * + * This function calls GCM_CRYPT_AND_TAG (above) to perform the decryption + * and authentication tag generation. + * + ******************************************************************************/ +int gcm_auth_decrypt( + gcm_context *ctx, // gcm context with key already setup + const uchar *iv, // pointer to the 12-byte initialization vector + size_t iv_len, // byte length if the IV. should always be 12 + const uchar *add, // pointer to the non-ciphered additional data + size_t add_len, // byte length of the additional AEAD data + const uchar *input, // pointer to the cipher data source + uchar *output, // pointer to the cipher data destination + size_t length, // byte length of the cipher data + const uchar *tag, // pointer to the tag to be authenticated + size_t tag_len); // byte length of the tag <= 16 + +/****************************************************************************** + * + * GCM_START + * + * Given a user-provided GCM context, this initializes it, sets the encryption + * mode, and preprocesses the initialization vector and additional AEAD data. + * + ******************************************************************************/ +int gcm_start( + gcm_context *ctx, // pointer to user-provided GCM context + int mode, // ENCRYPT (1) or DECRYPT (0) + const uchar *iv, // pointer to initialization vector + size_t iv_len, // IV length in bytes (should == 12) + const uchar *add, // pointer to additional AEAD data (NULL if none) + size_t add_len); // length of additional AEAD data (bytes) + +/****************************************************************************** + * + * GCM_UPDATE + * + * This is called once or more to process bulk plaintext or ciphertext data. + * We give this some number of bytes of input and it returns the same number + * of output bytes. If called multiple times (which is fine) all but the final + * invocation MUST be called with length mod 16 == 0. (Only the final call can + * have a partial block length of < 128 bits.) + * + ******************************************************************************/ +int gcm_update(gcm_context *ctx, // pointer to user-provided GCM context + size_t length, // length, in bytes, of data to process + const uchar *input, // pointer to source data + uchar *output); // pointer to destination data + +/****************************************************************************** + * + * GCM_FINISH + * + * This is called once after all calls to GCM_UPDATE to finalize the GCM. + * It performs the final GHASH to produce the resulting authentication TAG. + * + ******************************************************************************/ +int gcm_finish(gcm_context *ctx, // pointer to user-provided GCM context + uchar *tag, // ptr to tag buffer - NULL if tag_len = 0 + size_t tag_len); // length, in bytes, of the tag-receiving buf + +/****************************************************************************** + * + * GCM_ZERO_CTX + * + * The GCM context contains both the GCM context and the AES context. + * This includes keying and key-related material which is security- + * sensitive, so it MUST be zeroed after use. This function does that. + * + ******************************************************************************/ +void gcm_zero_ctx(gcm_context *ctx); + +#endif /* GCM_HEADER */ +// +// aes-gcm.h +// MKo +// +// Created by Markus Kosmal on 20/11/14. +// +// + +#ifndef mko_aes_gcm_h +#define mko_aes_gcm_h + +int aes_gcm_encrypt(unsigned char *output, const unsigned char *input, + size_t input_length, const unsigned char *key, + const size_t key_len, const unsigned char *iv, + const size_t iv_len, unsigned char *aead, size_t aead_len, + unsigned char *tag, const size_t tag_len); + +int aes_gcm_decrypt(unsigned char *output, const unsigned char *input, + size_t input_length, const unsigned char *key, + const size_t key_len, const unsigned char *iv, + const size_t iv_len); + +#endif + diff --git a/src/tls_builtin.c b/src/tls_builtin.c index 4338962c..859fea65 100644 --- a/src/tls_builtin.c +++ b/src/tls_builtin.c @@ -1,162 +1,916 @@ #include "tls.h" #if MG_TLS == MG_TLS_BUILTIN -struct tls_data { - uint8_t client_random[32]; // From client hello - uint8_t client_pub[32]; // From client hello + +/* handshake is re-entrant, so we need to keep track of its state */ +enum mg_tls_hs_state { + MG_TLS_HS_CLIENT_HELLO, // first, wait for ClientHello + MG_TLS_HS_SERVER_HELLO, // then, send all server handshake data at once + MG_TLS_HS_CLIENT_CHANGE_CIPHER, // finally wait for ClientChangeCipher + MG_TLS_HS_CLIENT_FINISH, // and ClientFinish (encrypted) + MG_TLS_HS_DONE, // finish handshake, start application data flow }; -struct tls_ctx { - struct mg_iobuf server_cert; // Decoded server certificate - struct mg_iobuf server_key; // Decoded server private key + +/* per-connection TLS data */ +struct tls_data { + enum mg_tls_hs_state state; /* keep track of connection handshake progress */ + + struct mg_iobuf send; + struct mg_iobuf recv; + + mg_sha256_ctx sha256; /* incremental SHA-256 hash for TLS handshake */ + + uint32_t sseq; /* server sequence number, used in encryption */ + uint32_t cseq; /* client sequence number, used in decryption */ + + uint8_t session_id[32]; /* client session ID between the handshake states */ + uint8_t x25519_cli[32]; /* client X25519 key between the handshake states */ + uint8_t x25519_sec[32]; /* x25519 secret between the handshake + states */ + + struct mg_str server_cert_der; /* server certificate in DER format */ + uint8_t server_key[32]; /* server EC private key */ + + /* keys for AES encryption */ + uint8_t handshake_secret[32]; + uint8_t server_write_key[16]; + uint8_t server_write_iv[12]; + uint8_t server_finished_key[32]; + uint8_t client_write_key[16]; + uint8_t client_write_iv[12]; + uint8_t client_finished_key[32]; }; #define MG_LOAD_BE16(p) ((uint16_t) ((MG_U8P(p)[0] << 8U) | MG_U8P(p)[1])) #define TLS_HDR_SIZE 5 // 1 byte type, 2 bytes version, 2 bytes len -static inline bool mg_is_big_endian(void) { - int v = 1; - return *(unsigned char *) &v == 1; -} -static inline uint16_t mg_swap16(uint16_t v) { - return (uint16_t) ((v << 8U) | (v >> 8U)); -} -static inline uint16_t mg_be16(uint16_t v) { - return mg_is_big_endian() ? mg_swap16(v) : v; -} -#if 0 -static inline uint32_t mg_swap32(uint32_t v) { - return (v >> 24) | (v >> 8 & 0xff00) | (v << 8 & 0xff0000) | (v << 24); -} -static inline uint64_t mg_swap64(uint64_t v) { - return (((uint64_t) mg_swap32((uint32_t) v)) << 32) | - mg_swap32((uint32_t) (v >> 32)); -} -static inline uint32_t mg_be32(uint32_t v) { - return mg_is_big_endian() ? mg_swap32(v) : v; -} -#endif +// for derived tls keys we need SHA256([0]*32) +static uint8_t zeros[32] = {0}; +static uint8_t zeros_sha256_digest[32] = + "\xe3\xb0\xc4\x42\x98\xfc\x1c\x14\x9a\xfb\xf4\xc8\x99\x6f\xb9\x24" + "\x27\xae\x41\xe4\x64\x9b\x93\x4c\xa4\x95\x99\x1b\x78\x52\xb8\x55"; -static inline void add8(struct mg_iobuf *io, uint8_t data) { - mg_iobuf_add(io, io->len, &data, sizeof(data)); +#define X25519_BYTES 32 +const uint8_t X25519_BASE_POINT[X25519_BYTES] = {9}; + +#define X25519_WBITS 32 + +typedef uint32_t limb_t; +typedef uint64_t dlimb_t; +typedef int64_t sdlimb_t; +#define LIMB(x) (uint32_t)(x##ull), (uint32_t) ((x##ull) >> 32) + +#define NLIMBS (256 / X25519_WBITS) +typedef limb_t fe[NLIMBS]; + +static limb_t umaal(limb_t *carry, limb_t acc, limb_t mand, limb_t mier) { + dlimb_t tmp = (dlimb_t) mand * mier + acc + *carry; + *carry = (limb_t) (tmp >> X25519_WBITS); + return (limb_t) tmp; } -static inline void add16(struct mg_iobuf *io, uint16_t data) { - data = mg_htons(data); - mg_iobuf_add(io, io->len, &data, sizeof(data)); + +/* These functions are implemented in terms of umaal on ARM */ +static limb_t adc(limb_t *carry, limb_t acc, limb_t mand) { + dlimb_t total = (dlimb_t) *carry + acc + mand; + *carry = (limb_t) (total >> X25519_WBITS); + return (limb_t) total; } -static inline void add32(struct mg_iobuf *io, uint32_t data) { - data = mg_htonl(data); - mg_iobuf_add(io, io->len, &data, sizeof(data)); + +static limb_t adc0(limb_t *carry, limb_t acc) { + dlimb_t total = (dlimb_t) *carry + acc; + *carry = (limb_t) (total >> X25519_WBITS); + return (limb_t) total; +} + +/* Precondition: carry is small. + * Invariant: result of propagate is < 2^255 + 1 word + * In particular, always less than 2p. + * Also, output x >= min(x,19) + */ +static void propagate(fe x, limb_t over) { + unsigned i; + limb_t carry; + over = x[NLIMBS - 1] >> (X25519_WBITS - 1) | over << 1; + x[NLIMBS - 1] &= ~((limb_t) 1 << (X25519_WBITS - 1)); + + carry = over * 19; + for (i = 0; i < NLIMBS; i++) { + x[i] = adc0(&carry, x[i]); + } +} + +static void add(fe out, const fe a, const fe b) { + unsigned i; + limb_t carry = 0; + for (i = 0; i < NLIMBS; i++) { + out[i] = adc(&carry, a[i], b[i]); + } + propagate(out, carry); +} + +static void sub(fe out, const fe a, const fe b) { + unsigned i; + sdlimb_t carry = -38; + for (i = 0; i < NLIMBS; i++) { + carry = carry + a[i] - b[i]; + out[i] = (limb_t) carry; + carry >>= X25519_WBITS; + } + propagate(out, (limb_t) (1 + carry)); +} + +static void mul(fe out, const fe a, const fe b, unsigned nb) { + limb_t accum[2 * NLIMBS] = {0}; + unsigned i, j; + + limb_t carry2; + for (i = 0; i < nb; i++) { + limb_t mand = b[i]; + carry2 = 0; + for (j = 0; j < NLIMBS; j++) { + accum[i + j] = umaal(&carry2, accum[i + j], mand, a[j]); + } + accum[i + j] = carry2; + } + + carry2 = 0; + for (j = 0; j < NLIMBS; j++) { + out[j] = umaal(&carry2, accum[j], 38, accum[j + NLIMBS]); + } + propagate(out, carry2); +} + +static void sqr(fe out, const fe a) { + mul(out, a, a, NLIMBS); +} +static void mul1(fe out, const fe a) { + mul(out, a, out, NLIMBS); +} +static void sqr1(fe a) { + mul1(a, a); +} + +static void condswap(limb_t a[2 * NLIMBS], limb_t b[2 * NLIMBS], + limb_t doswap) { + unsigned i; + for (i = 0; i < 2 * NLIMBS; i++) { + limb_t xor = (a[i] ^ b[i]) & doswap; + a[i] ^= xor; + b[i] ^= xor; + } +} + +static limb_t canon(fe x) { + /* Canonicalize a field element x, reducing it to the least residue + * which is congruent to it mod 2^255-19. + * + * Precondition: x < 2^255 + 1 word + */ + + /* First, add 19. */ + unsigned i; + limb_t carry0 = 19; + limb_t res; + sdlimb_t carry; + for (i = 0; i < NLIMBS; i++) { + x[i] = adc0(&carry0, x[i]); + } + propagate(x, carry0); + + /* Here, 19 <= x2 < 2^255 + * + * This is because we added 19, so before propagate it can't be less than 19. + * After propagate, it still can't be less than 19, because if propagate does + * anything it adds 19. + * + * We know that the high bit must be clear, because either the input was + * ~ 2^255 + one word + 19 (in which case it propagates to at most 2 words) + * or it was < 2^255. + * + * So now, if we subtract 19, we will get back to something in [0,2^255-19). + */ + carry = -19; + res = 0; + for (i = 0; i < NLIMBS; i++) { + carry += x[i]; + res |= x[i] = (limb_t) carry; + carry >>= X25519_WBITS; + } + return (limb_t) (((dlimb_t) res - 1) >> X25519_WBITS); +} + +static const limb_t a24[1] = {121665}; + +static void ladder_part1(fe xs[5]) { + limb_t *x2 = xs[0], *z2 = xs[1], *x3 = xs[2], *z3 = xs[3], *t1 = xs[4]; + add(t1, x2, z2); // t1 = A + sub(z2, x2, z2); // z2 = B + add(x2, x3, z3); // x2 = C + sub(z3, x3, z3); // z3 = D + mul1(z3, t1); // z3 = DA + mul1(x2, z2); // x3 = BC + add(x3, z3, x2); // x3 = DA+CB + sub(z3, z3, x2); // z3 = DA-CB + sqr1(t1); // t1 = AA + sqr1(z2); // z2 = BB + sub(x2, t1, z2); // x2 = E = AA-BB + mul(z2, x2, a24, sizeof(a24) / sizeof(a24[0])); // z2 = E*a24 + add(z2, z2, t1); // z2 = E*a24 + AA +} + +static void ladder_part2(fe xs[5], const fe x1) { + limb_t *x2 = xs[0], *z2 = xs[1], *x3 = xs[2], *z3 = xs[3], *t1 = xs[4]; + sqr1(z3); // z3 = (DA-CB)^2 + mul1(z3, x1); // z3 = x1 * (DA-CB)^2 + sqr1(x3); // x3 = (DA+CB)^2 + mul1(z2, x2); // z2 = AA*(E*a24+AA) + sub(x2, t1, x2); // x2 = BB again + mul1(x2, t1); // x2 = AA*BB +} + +static void x25519_core(fe xs[5], const uint8_t scalar[X25519_BYTES], + const uint8_t *x1, int clamp) { + int i; + limb_t swap = 0; + limb_t *x2 = xs[0], *x3 = xs[2], *z3 = xs[3]; + memset(xs, 0, 4 * sizeof(fe)); + x2[0] = z3[0] = 1; + memcpy(x3, x1, sizeof(fe)); + + for (i = 255; i >= 0; i--) { + uint8_t bytei = scalar[i / 8]; + limb_t doswap; + if (clamp) { + if (i / 8 == 0) { + bytei &= (uint8_t) ~7U; + } else if (i / 8 == X25519_BYTES - 1) { + bytei &= 0x7F; + bytei |= 0x40; + } + } + doswap = 0 - (limb_t) ((bytei >> (i % 8)) & 1); + condswap(x2, x3, swap ^ doswap); + swap = doswap; + + ladder_part1(xs); + ladder_part2(xs, (const limb_t *) x1); + } + condswap(x2, x3, swap); +} + +static int x25519(uint8_t out[X25519_BYTES], const uint8_t scalar[X25519_BYTES], + const uint8_t x1[X25519_BYTES], int clamp) { + int i, ret; + fe xs[5]; + limb_t *x2, *z2, *z3, *prev; + static const struct { + uint8_t a, c, n; + } steps[13] = {{2, 1, 1}, {2, 1, 1}, {4, 2, 3}, {2, 4, 6}, {3, 1, 1}, + {3, 2, 12}, {4, 3, 25}, {2, 3, 25}, {2, 4, 50}, {3, 2, 125}, + {3, 1, 2}, {3, 1, 2}, {3, 1, 1}}; + x25519_core(xs, scalar, x1, clamp); + + /* Precomputed inversion chain */ + x2 = xs[0]; + z2 = xs[1]; + z3 = xs[3]; + + prev = z2; + for (i = 0; i < 13; i++) { + int j; + limb_t *a = xs[steps[i].a]; + for (j = steps[i].n; j > 0; j--) { + sqr(a, prev); + prev = a; + } + mul1(a, xs[steps[i].c]); + } + + /* Here prev = z3 */ + /* x2 /= z2 */ + mul((limb_t *) out, x2, z3, NLIMBS); + ret = (int) canon((limb_t *) out); + if (!clamp) ret = 0; + return ret; +} + +/* a help to hexdump buffers inline */ +static void mg_tls_hexdump(const char *msg, uint8_t *buf, size_t bufsz) { + char p[2048]; + MG_INFO(("%s: %s", msg, mg_hex(buf, bufsz, p))); +} + +/* TLS1.3 secret derivation based on the key label */ +static void mg_tls_derive_secret(const char *label, uint8_t *key, size_t keysz, + uint8_t *data, size_t datasz, uint8_t *hash, + size_t hashsz) { + size_t labelsz = strlen(label); + uint8_t secret[32]; + uint8_t packed[256] = {0, (uint8_t) hashsz, (uint8_t) labelsz}; + // TODO: assert lengths of label, key, data and hash + memmove(packed + 3, label, labelsz); + packed[3 + labelsz] = (uint8_t) datasz; + memmove(packed + labelsz + 4, data, datasz); + packed[4 + labelsz + datasz] = 1; + + mg_hmac_sha256(secret, key, keysz, packed, 5 + labelsz + datasz); + memmove(hash, secret, hashsz); +} + +/* receive as much data as we can, but at least one full TLS record */ +static int mg_tls_recv_msg(struct mg_connection *c) { + struct tls_data *tls = c->tls; + struct mg_iobuf *rio = &tls->recv; + uint16_t record_len; + // Pull data from TCP + for (;;) { + long n; + mg_iobuf_resize(rio, rio->len + 1); + n = mg_io_recv(c, &rio->buf[rio->len], rio->size - rio->len); + if (n > 0) { + rio->len += (size_t) n; + } else if (n == MG_IO_WAIT) { + break; + } else { + if (!c->is_closing) { + mg_error(c, "read IO err"); + } + return MG_IO_ERR; + } + } + // Look if we've pulled everything + if (rio->len < TLS_HDR_SIZE) return MG_IO_WAIT; + + record_len = MG_LOAD_BE16(rio->buf + 3); + if (rio->len < (size_t) TLS_HDR_SIZE + record_len) return MG_IO_WAIT; + return 0; +} + +// Remove a single TLS record from the recv buffer +static void mg_tls_drop_packet(struct mg_iobuf *rio) { + uint16_t n = MG_LOAD_BE16(rio->buf + 3) + TLS_HDR_SIZE; + mg_iobuf_del(rio, 0, n); + // memmove(rio->buf, rio->buf + n, rio->len - n); + // rio->len = rio->len - n; +} + +/* read and parse ClientHello record */ +static int mg_tls_client_hello(struct mg_connection *c) { + struct tls_data *tls = c->tls; + struct mg_iobuf *rio = &tls->recv; + uint8_t session_id_len; + uint16_t j; + uint16_t cipher_suites_len; + uint16_t ext_len; + uint8_t *ext; + + int r = mg_tls_recv_msg(c); + if (r < 0) { + return r; + } + if (rio->buf[0] != 0x16 || rio->buf[5] != 0x01) { + mg_error(c, "not a hello packet"); + return -1; + } + mg_sha256_update(&tls->sha256, rio->buf + 5, rio->len - 5); + session_id_len = rio->buf[43]; + if (session_id_len == sizeof(tls->session_id)) { + memmove(tls->session_id, rio->buf + 44, session_id_len); + } else if (session_id_len != 0) { + MG_INFO(("bad session id len")); + } + cipher_suites_len = MG_LOAD_BE16(rio->buf + 44 + session_id_len); + ext_len = MG_LOAD_BE16(rio->buf + 48 + session_id_len + cipher_suites_len); + ext = rio->buf + 50 + session_id_len + cipher_suites_len; + for (j = 0; j < ext_len;) { + uint16_t k; + uint16_t key_exchange_len; + uint8_t *key_exchange; + uint16_t n = MG_LOAD_BE16(ext + j + 2); + if (ext[j] != 0x00 || + ext[j + 1] != 0x33) { // not a key share extension, ignore + j += (uint16_t) (n + 4); + continue; + } + key_exchange_len = MG_LOAD_BE16(ext + j + 5); + key_exchange = ext + j + 6; + for (k = 0; k < key_exchange_len;) { + uint16_t m = MG_LOAD_BE16(key_exchange + k + 2); + if (m == 32 && key_exchange[k] == 0x00 && key_exchange[k + 1] == 0x1d) { + memmove(tls->x25519_cli, key_exchange + k + 4, m); + mg_tls_drop_packet(rio); + return 0; + } + k += (uint16_t) (m + 4); + } + j += (uint16_t) (n + 4); + } + mg_error(c, "bad client hello"); + return -1; +} + +/* put ServerHello record into wio buffer */ +static void mg_tls_server_hello(struct mg_connection *c) { + struct tls_data *tls = c->tls; + struct mg_iobuf *wio = &tls->send; + + uint8_t msg_server_hello[122] = + // server hello, tls 1.2 + "\x02\x00\x00\x76\x03\x03" + // random (32 bytes) + "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe" + "\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe\xfe" + // session ID length + session ID (32 bytes) + "\x20" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" +#if defined(CHACHA20) && CHACHA20 + // TLS_CHACHA20_POLY1305_SHA256 + no compression + "\x13\x03\x00" +#else + // TLS_AES_128_GCM_SHA256 + no compression + "\x13\x01\x00" +#endif + // extensions + keyshare + "\x00\x2e\x00\x33\x00\x24\x00\x1d\x00\x20" + // x25519 keyshare + "\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab" + "\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab\xab" + // supported versions (tls1.3 == 0x304) + "\x00\x2b\x00\x02\x03\x04"; + + // calculate keyshare + uint8_t x25519_pub[X25519_BYTES]; + uint8_t x25519_prv[X25519_BYTES]; + mg_random(x25519_prv, sizeof(x25519_prv)); + x25519(x25519_pub, x25519_prv, X25519_BASE_POINT, 1); + x25519(tls->x25519_sec, x25519_prv, tls->x25519_cli, 1); + mg_tls_hexdump("x25519 sec", tls->x25519_sec, sizeof(tls->x25519_sec)); + + // fill in the gaps: session ID + keyshare + memmove(msg_server_hello + 39, tls->session_id, sizeof(tls->session_id)); + memmove(msg_server_hello + 84, x25519_pub, sizeof(x25519_pub)); + + // server hello message + mg_iobuf_add(wio, wio->len, "\x16\x03\x03\x00\x7a", 5); + mg_iobuf_add(wio, wio->len, msg_server_hello, sizeof(msg_server_hello)); + mg_sha256_update(&tls->sha256, msg_server_hello, sizeof(msg_server_hello)); + + // change cipher message + mg_iobuf_add(wio, wio->len, "\x14\x03\x03\x00\x01\x01", 6); +} + +/* at this point we have x25519 shared secret, we can generate a + * set of derived handshake encryption keys */ +static void mg_tls_generate_handshake_keys(struct mg_connection *c) { + struct tls_data *tls = c->tls; + + mg_sha256_ctx sha256; + uint8_t early_secret[32]; + uint8_t pre_extract_secret[32]; + uint8_t hello_hash[32]; + uint8_t server_hs_secret[32]; + uint8_t client_hs_secret[32]; + + mg_hmac_sha256(early_secret, NULL, 0, zeros, sizeof(zeros)); + mg_tls_derive_secret("tls13 derived", early_secret, 32, zeros_sha256_digest, + 32, pre_extract_secret, 32); + mg_hmac_sha256(tls->handshake_secret, pre_extract_secret, + sizeof(pre_extract_secret), tls->x25519_sec, + sizeof(tls->x25519_sec)); + mg_tls_hexdump("hs secret", tls->handshake_secret, 32); + + // mg_sha256_final is not idempotent, need to copy sha256 context to calculate + // the digest + memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); + mg_sha256_final(hello_hash, &sha256); + + // derive keys needed for the rest of the handshake + mg_tls_derive_secret("tls13 s hs traffic", tls->handshake_secret, 32, + hello_hash, 32, server_hs_secret, 32); + mg_tls_derive_secret("tls13 key", server_hs_secret, 32, NULL, 0, + tls->server_write_key, 16); + mg_tls_derive_secret("tls13 iv", server_hs_secret, 32, NULL, 0, + tls->server_write_iv, 12); + mg_tls_derive_secret("tls13 finished", server_hs_secret, 32, NULL, 0, + tls->server_finished_key, 32); + mg_tls_hexdump("s hs traffic", server_hs_secret, 32); + + mg_tls_derive_secret("tls13 c hs traffic", tls->handshake_secret, 32, + hello_hash, 32, client_hs_secret, 32); + mg_tls_derive_secret("tls13 key", client_hs_secret, 32, NULL, 0, + tls->client_write_key, 16); + mg_tls_derive_secret("tls13 iv", client_hs_secret, 32, NULL, 0, + tls->client_write_iv, 12); + mg_tls_derive_secret("tls13 finished", client_hs_secret, 32, NULL, 0, + tls->client_finished_key, 32); +} + +/* AES GCM enctyption of the message + put encoded data into the write buffer */ +static void mg_tls_encrypt(struct mg_connection *c, const uint8_t *msg, + size_t msgsz, uint8_t msgtype) { + struct tls_data *tls = c->tls; + struct mg_iobuf *wio = &tls->send; + uint8_t *outmsg; + uint8_t *tag; + size_t encsz = msgsz + 16 + 1; + uint8_t hdr[5] = {0x17, 0x03, 0x03, (encsz >> 8) & 0xff, encsz & 0xff}; + uint8_t associated_data[5] = {0x17, 0x03, 0x03, (encsz >> 8) & 0xff, + encsz & 0xff}; + uint8_t nonce[12]; + memmove(nonce, tls->server_write_iv, sizeof(tls->server_write_iv)); + nonce[8] ^= (uint8_t) ((tls->sseq >> 24) & 255U); + nonce[9] ^= (uint8_t) ((tls->sseq >> 16) & 255U); + nonce[10] ^= (uint8_t) ((tls->sseq >> 8) & 255U); + nonce[11] ^= (uint8_t) ((tls->sseq) & 255U); + + gcm_initialize(); + mg_iobuf_add(wio, wio->len, hdr, sizeof(hdr)); + mg_iobuf_resize(wio, wio->len + encsz); + outmsg = wio->buf + wio->len; + tag = wio->buf + wio->len + msgsz + 1; + memmove(outmsg, msg, msgsz); + outmsg[msgsz] = msgtype; + aes_gcm_encrypt(outmsg, outmsg, msgsz + 1, tls->server_write_key, + sizeof(tls->server_write_key), nonce, sizeof(nonce), + associated_data, sizeof(associated_data), tag, 16); + wio->len += encsz; + tls->sseq++; +} + +/* read an encrypted message, decrypt it into read buffer (AES GCM) */ +static int mg_tls_recv_decrypt(struct mg_connection *c, void *buf, + size_t bufsz) { + struct tls_data *tls = c->tls; + struct mg_iobuf *rio = &tls->recv; + uint16_t msgsz; + uint8_t *msg; + uint8_t nonce[12]; + int r; + for (;;) { + r = mg_tls_recv_msg(c); + if (r < 0) return r; + if (rio->buf[0] == 0x17) { + break; + } else if (rio->buf[0] == 0x15) { + MG_INFO(("TLS ALERT packet received")); /* TODO: drop packet? */ + } else { + mg_error(c, "unexpected packet"); + return -1; + } + } + msgsz = MG_LOAD_BE16(rio->buf + 3); + msg = rio->buf + 5; + memmove(nonce, tls->client_write_iv, sizeof(tls->client_write_iv)); + nonce[8] ^= (uint8_t) ((tls->cseq >> 24) & 255U); + nonce[9] ^= (uint8_t) ((tls->cseq >> 16) & 255U); + nonce[10] ^= (uint8_t) ((tls->cseq >> 8) & 255U); + nonce[11] ^= (uint8_t) ((tls->cseq) & 255U); + aes_gcm_decrypt(msg, msg, msgsz - 16, tls->client_write_key, + sizeof(tls->client_write_key), nonce, sizeof(nonce)); + r = msgsz - 16 - 1; + if (msg[r] == 0x17) { + if (bufsz > 0) { + memmove(buf, msg, msgsz - 16); + } + } else { + r = 0; + } + tls->cseq++; + mg_tls_drop_packet(rio); + return r; +} + +static void mg_tls_server_extensions(struct mg_connection *c) { + struct tls_data *tls = c->tls; + // server extensions + uint8_t ext[6] = {0x08, 0, 0, 2, 0, 0}; + mg_sha256_update(&tls->sha256, ext, sizeof(ext)); + mg_tls_encrypt(c, ext, sizeof(ext), 0x16); +} + +static void mg_tls_server_cert(struct mg_connection *c) { + struct tls_data *tls = c->tls; + // server DER certificate (empty) + size_t n = tls->server_cert_der.len; + uint8_t *cert = calloc(1, 13 + n); // FIXME: free + cert[0] = 0x0b; // handshake header + cert[1] = (uint8_t) (((n + 9) >> 16) & 255U); // 3 bytes: payload length + cert[2] = (uint8_t) (((n + 9) >> 8) & 255U); + cert[3] = (uint8_t) ((n + 9) & 255U); + cert[4] = 0; // request context + cert[5] = (uint8_t) (((n + 5) >> 16) & 255U); // 3 bytes: cert (s) length + cert[6] = (uint8_t) (((n + 5) >> 8) & 255U); + cert[7] = (uint8_t) ((n + 5) & 255U); + cert[8] = + (uint8_t) (((n) >> 16) & 255U); // 3 bytes: first (and only) cert len + cert[9] = (uint8_t) (((n) >> 8) & 255U); + cert[10] = (uint8_t) (n & 255U); + // bytes 11+ are certificate in DER format + memmove(cert + 11, tls->server_cert_der.ptr, n); + cert[11 + n] = cert[12 + n] = 0; // certificate extensions (none) + mg_sha256_update(&tls->sha256, cert, 13 + n); + mg_tls_encrypt(c, cert, 13 + n, 0x16); +} + +/* type adapter between uECC hash context and our sha256 implementation */ +typedef struct SHA256_HashContext { + uECC_HashContext uECC; + mg_sha256_ctx ctx; +} SHA256_HashContext; + +static void init_SHA256(const uECC_HashContext *base) { + SHA256_HashContext *c = (SHA256_HashContext *) base; + mg_sha256_init(&c->ctx); +} + +static void update_SHA256(const uECC_HashContext *base, const uint8_t *message, + unsigned message_size) { + SHA256_HashContext *c = (SHA256_HashContext *) base; + mg_sha256_update(&c->ctx, message, message_size); +} +static void finish_SHA256(const uECC_HashContext *base, uint8_t *hash_result) { + SHA256_HashContext *c = (SHA256_HashContext *) base; + mg_sha256_final(hash_result, &c->ctx); +} + +static void mg_tls_server_verify_ecdsa(struct mg_connection *c) { + struct tls_data *tls = c->tls; + // server certificate verify packet + uint8_t verify[82] = {0x0f, 0x00, 0x00, 0x00, 0x04, 0x03, 0x00, 0x00}; + size_t sigsz, verifysz = 0; + uint8_t hash[32] = {0}, tmp[2 * 32 + 64] = {0}; + struct SHA256_HashContext ctx = { + {&init_SHA256, &update_SHA256, &finish_SHA256, 64, 32, tmp}, + {{0}, 0, 0, {0}}}; + int neg1, neg2; + uint8_t sig[64], sig_content[130] = { + " " + " " + "TLS 1.3, server CertificateVerify\0"}; + mg_sha256_ctx sha256; + memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); + mg_sha256_final(sig_content + 98, &sha256); + + mg_sha256_init(&sha256); + mg_sha256_update(&sha256, sig_content, sizeof(sig_content)); + mg_sha256_final(hash, &sha256); + + uECC_sign_deterministic(tls->server_key, hash, sizeof(hash), &ctx.uECC, sig, + uECC_secp256r1()); + + neg1 = !!(sig[0] & 0x80); + neg2 = !!(sig[32] & 0x80); + verify[8] = 0x30; /* ASN.1 SEQUENCE */ + verify[9] = (uint8_t) (68 + neg1 + neg2); + verify[10] = 0x02; /* ASN.1 INTEGER */ + verify[11] = (uint8_t) (32 + neg1); + memmove(verify + 12 + neg1, sig, 32); + verify[12 + 32 + neg1] = 0x02; /* ASN.1 INTEGER */ + verify[13 + 32 + neg1] = (uint8_t) (32 + neg2); + memmove(verify + 14 + 32 + neg1 + neg2, sig + 32, 32); + + sigsz = (size_t) (70 + neg1 + neg2); + verifysz = 8U + sigsz; + verify[3] = (uint8_t) (sigsz + 4); + verify[7] = (uint8_t) sigsz; + + mg_tls_hexdump("verify", verify, verifysz); + + mg_sha256_update(&tls->sha256, verify, verifysz); + mg_tls_encrypt(c, verify, verifysz, 0x16); +} + +static void mg_tls_server_finish(struct mg_connection *c) { + struct tls_data *tls = c->tls; + struct mg_iobuf *wio = &tls->send; + mg_sha256_ctx sha256; + uint8_t hash[32]; + uint8_t finish[36] = {0x14, 0, 0, 32}; + memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); + mg_sha256_final(hash, &sha256); + mg_hmac_sha256(finish + 4, tls->server_finished_key, 32, hash, 32); + mg_tls_hexdump("hash", hash, sizeof(hash)); + mg_tls_hexdump("key", tls->server_finished_key, + sizeof(tls->server_finished_key)); + mg_tls_encrypt(c, finish, sizeof(finish), 0x16); + mg_io_send(c, wio->buf, wio->len); + wio->len = 0; + + mg_sha256_update(&tls->sha256, finish, sizeof(finish)); +} + +static int mg_tls_client_change_cipher(struct mg_connection *c) { + struct tls_data *tls = c->tls; + struct mg_iobuf *rio = &tls->recv; + for (;;) { + int r = mg_tls_recv_msg(c); + if (r < 0) return r; + if (rio->buf[0] == 0x14) { // got a ChangeCipher record + break; + } else if (rio->buf[0] == 0x15) { // skip Alert records + MG_DEBUG(("TLS ALERT packet received")); + mg_tls_drop_packet(rio); + } else { + mg_error(c, "unexpected packet"); + return -1; + } + } + // consume ChangeCipher packet + mg_tls_drop_packet(rio); + return 0; +} + +static int mg_tls_client_finish(struct mg_connection *c) { + uint8_t tmp[2048]; + int n = mg_tls_recv_decrypt(c, tmp, sizeof(tmp)); + if (n < 0) { + return -1; + } + // TODO: make sure it's a ClientFinish record + return 0; +} + +static void mg_tls_generate_application_keys(struct mg_connection *c) { + struct tls_data *tls = c->tls; + uint8_t hash[32]; + uint8_t premaster_secret[32]; + uint8_t master_secret[32]; + uint8_t server_secret[32]; + uint8_t client_secret[32]; + + mg_sha256_ctx sha256; + memmove(&sha256, &tls->sha256, sizeof(mg_sha256_ctx)); + mg_sha256_final(hash, &sha256); + + mg_tls_derive_secret("tls13 derived", tls->handshake_secret, 32, + zeros_sha256_digest, 32, premaster_secret, 32); + mg_hmac_sha256(master_secret, premaster_secret, 32, zeros, 32); + + mg_tls_derive_secret("tls13 s ap traffic", master_secret, 32, hash, 32, + server_secret, 32); + mg_tls_derive_secret("tls13 key", server_secret, 32, NULL, 0, + tls->server_write_key, 16); + mg_tls_derive_secret("tls13 iv", server_secret, 32, NULL, 0, + tls->server_write_iv, 12); + mg_tls_derive_secret("tls13 c ap traffic", master_secret, 32, hash, 32, + client_secret, 32); + mg_tls_derive_secret("tls13 key", client_secret, 32, NULL, 0, + tls->client_write_key, 16); + mg_tls_derive_secret("tls13 iv", client_secret, 32, NULL, 0, + tls->client_write_iv, 12); + + tls->sseq = tls->cseq = 0; +} + +void mg_tls_handshake(struct mg_connection *c) { + struct tls_data *tls = c->tls; + switch (tls->state) { + case MG_TLS_HS_CLIENT_HELLO: + if (mg_tls_client_hello(c) < 0) { + return; + } + tls->state = MG_TLS_HS_SERVER_HELLO; + // fallthrough + case MG_TLS_HS_SERVER_HELLO: + mg_tls_server_hello(c); + mg_tls_generate_handshake_keys(c); + mg_tls_server_extensions(c); + mg_tls_server_cert(c); + mg_tls_server_verify_ecdsa(c); + mg_tls_server_finish(c); + tls->state = MG_TLS_HS_CLIENT_CHANGE_CIPHER; + // fallthrough + case MG_TLS_HS_CLIENT_CHANGE_CIPHER: + if (mg_tls_client_change_cipher(c) < 0) { + return; + } + tls->state = MG_TLS_HS_CLIENT_FINISH; + // fallthrough + case MG_TLS_HS_CLIENT_FINISH: + if (mg_tls_client_finish(c) < 0) { + return; + } + mg_tls_generate_application_keys(c); + tls->state = MG_TLS_HS_DONE; + // fallthrough + case MG_TLS_HS_DONE: c->is_tls_hs = 0; return; + } +} + +static int mg_parse_pem(const struct mg_str pem, const struct mg_str label, + struct mg_str *der) { + size_t n = 0, m = 0; + char *s; + const char *c; + struct mg_str caps[5]; + if (!mg_match(pem, mg_str("#-----BEGIN #-----#-----END #-----#"), caps)) { + *der = mg_strdup(pem); + return 0; + } + if (mg_strcmp(caps[1], label) != 0 || mg_strcmp(caps[3], label) != 0) { + return -1; // bad label + } + if ((s = calloc(1, caps[2].len)) == NULL) { + return -1; + } + + for (c = caps[2].ptr; c < caps[2].ptr + caps[2].len; c++) { + if (*c == ' ' || *c == '\n' || *c == '\r' || *c == '\t') { + continue; + } + s[n++] = *c; + } + m = mg_base64_decode(s, n, s, n); + if (m == 0) { + free(s); + return -1; + } + der->ptr = s; + der->len = m; + return 0; } void mg_tls_init(struct mg_connection *c, const struct mg_tls_opts *opts) { + struct mg_str key; struct tls_data *tls = (struct tls_data *) calloc(1, sizeof(struct tls_data)); - if (tls != NULL) { - // tls->send.align = tls->recv.align = MG_IO_SIZE; - c->tls = tls; - c->is_tls = c->is_tls_hs = 1; - } else { + if (tls == NULL) { mg_error(c, "tls oom"); + return; } - (void) opts; + /* parse PEM or DER EC key */ + if (opts->key.ptr == NULL || + mg_parse_pem(opts->key, mg_str_s("EC PRIVATE KEY"), &key) < 0) { + MG_ERROR(("Failed to load EC private key")); + return; + } + if (key.len < 39) { + MG_ERROR(("EC private key too short")); + return; + } + /* expect ASN.1 SEQUENCE=[INTEGER=1, BITSTRING of 32 bytes, ...] */ + /* 30 nn 02 01 01 04 20 [key] ... */ + if (key.ptr[0] != 0x30 || (key.ptr[1] & 0x80) != 0) { + MG_ERROR(("EC private key: ASN.1 bad sequence")); + return; + } + if (memcmp(key.ptr + 2, "\x02\x01\x01\x04\x20", 5) != 0) { + MG_ERROR(("EC private key: ASN.1 bad data")); + } + memmove(tls->server_key, key.ptr + 7, 32); + free((void *) key.ptr); + + /* parse PEM or DER certificate */ + if (mg_parse_pem(opts->cert, mg_str_s("CERTIFICATE"), &tls->server_cert_der) < + 0) { + MG_ERROR(("Failed to load certificate")); + return; + } + + tls->send.align = tls->recv.align = MG_IO_SIZE; + c->tls = tls; + c->is_tls = c->is_tls_hs = 1; + mg_sha256_init(&tls->sha256); } + void mg_tls_free(struct mg_connection *c) { struct tls_data *tls = c->tls; if (tls != NULL) { - // mg_iobuf_free(&tls->send); - // mg_iobuf_free(&tls->recv); + mg_iobuf_free(&tls->send); + mg_iobuf_free(&tls->recv); + free((void *) tls->server_cert_der.ptr); } free(c->tls); c->tls = NULL; } + long mg_tls_send(struct mg_connection *c, const void *buf, size_t len) { - (void) c, (void) buf, (void) len; - // MG_INFO(("BBBBBBBB")); - return -1; -} -long mg_tls_recv(struct mg_connection *c, void *buf, size_t len) { - (void) c, (void) buf, (void) len; - char tmp[8192]; - long n = mg_io_recv(c, tmp, sizeof(tmp)); - if (n > 0) mg_hexdump(tmp, (size_t) n); - MG_INFO(("AAAAAAAA")); - return -1; - // struct mg_tls *tls = (struct mg_tls *) c->tls; - // long n = mbedtls_ssl_read(&tls->ssl, (unsigned char *) buf, len); - // if (n == MBEDTLS_ERR_SSL_WANT_READ || n == MBEDTLS_ERR_SSL_WANT_WRITE) - // return MG_IO_WAIT; - // if (n <= 0) return MG_IO_ERR; - // return n; -} -size_t mg_tls_pending(struct mg_connection *c) { - (void) c; - return 0; -} -void mg_tls_handshake(struct mg_connection *c) { - // struct tls_data *tls = c->tls; - struct mg_iobuf *rio = &c->raw; - struct mg_iobuf *wio = &c->send; - - // Look if we've pulled everything - if (rio->len < TLS_HDR_SIZE) return; - uint8_t record_type = rio->buf[0]; - uint16_t record_len = MG_LOAD_BE16(rio->buf + 3); - uint16_t record_version = MG_LOAD_BE16(rio->buf + 1); - if (record_type != 22) { - mg_error(c, "not a handshake"); - return; + struct tls_data *tls = c->tls; + long n = MG_IO_WAIT; + if (len > 2048) len = 2048; + mg_tls_encrypt(c, buf, len, 0x17); + while (tls->send.len > 0 && + (n = mg_io_send(c, tls->send.buf, tls->send.len)) > 0) { + mg_iobuf_del(&tls->send, 0, (size_t) n); } - if (rio->len < (size_t) TLS_HDR_SIZE + record_len) return; - // Got full hello - // struct tls_hello *hello = (struct tls_hello *) (hdr + 1); - MG_INFO(("CT=%d V=%hx L=%hu", record_type, record_version, record_len)); - // mg_hexdump(rio->buf, rio->len); - - // Send response. Server Hello - size_t ofs = wio->len; - add8(wio, 22), add16(wio, 0x303), add16(wio, 0); // Layer: type, ver, len - add8(wio, 2), add8(wio, 0), add16(wio, 0), add16(wio, 0x304); // Hello - mg_iobuf_add(wio, wio->len, NULL, 32); // 32 random - mg_random(wio->buf + wio->len - 32, 32); // bytes - add8(wio, 0); // Session ID - add16(wio, 0x1301); // Cipher: TLS_AES_128_GCM_SHA256 - add8(wio, 0); // Compression method: 0 - add16(wio, 46); // Extensions length - add16(wio, 43), add16(wio, 2), add16(wio, 0x304); // extension: TLS 1.3 - - // Key share: use curve x25519 (id 29) - add16(wio, 51), add16(wio, 36), add16(wio, 29), add16(wio, 32); // keyshare - mg_iobuf_add(wio, wio->len, NULL, 32); // 32 random - mg_random(wio->buf + wio->len - 32, 32); // bytes - *(uint16_t *) &wio->buf[ofs + 3] = mg_be16((uint16_t) (wio->len - ofs - 5)); - *(uint16_t *) &wio->buf[ofs + 7] = mg_be16((uint16_t) (wio->len - ofs - 9)); - - // Change cipher. Cipher's payload is an encypted app data - // ofs = wio->len; - add8(wio, 20), add16(wio, 0x303); // Layer: type, version - add16(wio, 1), add8(wio, 1); - - ofs = wio->len; // Application data - add8(wio, 23), add16(wio, 0x303), add16(wio, 5); // Layer: type, version - // mg_iobuf_add(wio, wio->len, "\x01\x02\x03\x04\x05", 5); - add8(wio, 22); // handshake message - add8(wio, 8); // encrypted extensions - add8(wio, 0), add16(wio, 2), add16(wio, 0); // empty 2 bytes - add8(wio, 11); // certificate message - add8(wio, 0), add16(wio, 4), add32(wio, 0x1020304); // len - *(uint16_t *) &wio->buf[ofs + 3] = mg_be16((uint16_t) (wio->len - ofs - 5)); - - mg_io_send(c, wio->buf, wio->len); - wio->len = 0; - - rio->len = 0; - c->is_tls_hs = 0; - mg_error(c, "doh"); + if (n == MG_IO_ERR || n == MG_IO_WAIT) return n; + return (long) len; } -void mg_tls_ctx_free(struct mg_mgr *mgr) { - free(mgr->tls_ctx); - mgr->tls_ctx = NULL; + +long mg_tls_recv(struct mg_connection *c, void *buf, size_t len) { + return mg_tls_recv_decrypt(c, buf, len); } + +size_t mg_tls_pending(struct mg_connection *c) { + struct tls_data *tls = (struct tls_data *) c->tls; + return tls == NULL ? 0 : tls->recv.len; +} + void mg_tls_ctx_init(struct mg_mgr *mgr) { (void) mgr; } + +void mg_tls_ctx_free(struct mg_mgr *mgr) { + (void) mgr; +} #endif diff --git a/src/tls_uecc.c b/src/tls_uecc.c new file mode 100644 index 00000000..2dff7630 --- /dev/null +++ b/src/tls_uecc.c @@ -0,0 +1,3173 @@ +/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */ + +#include "tls_uecc.h" +#include "tls.h" + +#if MG_TLS == MG_TLS_BUILTIN + +#ifndef uECC_RNG_MAX_TRIES +#define uECC_RNG_MAX_TRIES 64 +#endif + +#if uECC_ENABLE_VLI_API +#define uECC_VLI_API +#else +#define uECC_VLI_API static +#endif + +#if (uECC_PLATFORM == uECC_avr) || (uECC_PLATFORM == uECC_arm) || \ + (uECC_PLATFORM == uECC_arm_thumb) || (uECC_PLATFORM == uECC_arm_thumb2) +#define CONCATX(a, ...) a##__VA_ARGS__ +#define CONCAT(a, ...) CONCATX(a, __VA_ARGS__) + +#define STRX(a) #a +#define STR(a) STRX(a) + +#define EVAL(...) EVAL1(EVAL1(EVAL1(EVAL1(__VA_ARGS__)))) +#define EVAL1(...) EVAL2(EVAL2(EVAL2(EVAL2(__VA_ARGS__)))) +#define EVAL2(...) EVAL3(EVAL3(EVAL3(EVAL3(__VA_ARGS__)))) +#define EVAL3(...) EVAL4(EVAL4(EVAL4(EVAL4(__VA_ARGS__)))) +#define EVAL4(...) __VA_ARGS__ + +#define DEC_1 0 +#define DEC_2 1 +#define DEC_3 2 +#define DEC_4 3 +#define DEC_5 4 +#define DEC_6 5 +#define DEC_7 6 +#define DEC_8 7 +#define DEC_9 8 +#define DEC_10 9 +#define DEC_11 10 +#define DEC_12 11 +#define DEC_13 12 +#define DEC_14 13 +#define DEC_15 14 +#define DEC_16 15 +#define DEC_17 16 +#define DEC_18 17 +#define DEC_19 18 +#define DEC_20 19 +#define DEC_21 20 +#define DEC_22 21 +#define DEC_23 22 +#define DEC_24 23 +#define DEC_25 24 +#define DEC_26 25 +#define DEC_27 26 +#define DEC_28 27 +#define DEC_29 28 +#define DEC_30 29 +#define DEC_31 30 +#define DEC_32 31 + +#define DEC(N) CONCAT(DEC_, N) + +#define SECOND_ARG(_, val, ...) val +#define SOME_CHECK_0 ~, 0 +#define GET_SECOND_ARG(...) SECOND_ARG(__VA_ARGS__, SOME, ) +#define SOME_OR_0(N) GET_SECOND_ARG(CONCAT(SOME_CHECK_, N)) + +#define EMPTY(...) +#define DEFER(...) __VA_ARGS__ EMPTY() + +#define REPEAT_NAME_0() REPEAT_0 +#define REPEAT_NAME_SOME() REPEAT_SOME +#define REPEAT_0(...) +#define REPEAT_SOME(N, stuff) \ + DEFER(CONCAT(REPEAT_NAME_, SOME_OR_0(DEC(N))))()(DEC(N), stuff) stuff +#define REPEAT(N, stuff) EVAL(REPEAT_SOME(N, stuff)) + +#define REPEATM_NAME_0() REPEATM_0 +#define REPEATM_NAME_SOME() REPEATM_SOME +#define REPEATM_0(...) +#define REPEATM_SOME(N, macro) \ + macro(N) DEFER(CONCAT(REPEATM_NAME_, SOME_OR_0(DEC(N))))()(DEC(N), macro) +#define REPEATM(N, macro) EVAL(REPEATM_SOME(N, macro)) +#endif + +//#include "platform-specific.inc" + +#if (uECC_WORD_SIZE == 1) +#if uECC_SUPPORTS_secp160r1 +#define uECC_MAX_WORDS 21 /* Due to the size of curve_n. */ +#endif +#if uECC_SUPPORTS_secp192r1 +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 24 +#endif +#if uECC_SUPPORTS_secp224r1 +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 28 +#endif +#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 32 +#endif +#elif (uECC_WORD_SIZE == 4) +#if uECC_SUPPORTS_secp160r1 +#define uECC_MAX_WORDS 6 /* Due to the size of curve_n. */ +#endif +#if uECC_SUPPORTS_secp192r1 +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 6 +#endif +#if uECC_SUPPORTS_secp224r1 +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 7 +#endif +#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 8 +#endif +#elif (uECC_WORD_SIZE == 8) +#if uECC_SUPPORTS_secp160r1 +#define uECC_MAX_WORDS 3 +#endif +#if uECC_SUPPORTS_secp192r1 +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 3 +#endif +#if uECC_SUPPORTS_secp224r1 +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 4 +#endif +#if (uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1) +#undef uECC_MAX_WORDS +#define uECC_MAX_WORDS 4 +#endif +#endif /* uECC_WORD_SIZE */ + +#define BITS_TO_WORDS(num_bits) \ + ((wordcount_t) ((num_bits + ((uECC_WORD_SIZE * 8) - 1)) / \ + (uECC_WORD_SIZE * 8))) +#define BITS_TO_BYTES(num_bits) ((num_bits + 7) / 8) + +struct uECC_Curve_t { + wordcount_t num_words; + wordcount_t num_bytes; + bitcount_t num_n_bits; + uECC_word_t p[uECC_MAX_WORDS]; + uECC_word_t n[uECC_MAX_WORDS]; + uECC_word_t G[uECC_MAX_WORDS * 2]; + uECC_word_t b[uECC_MAX_WORDS]; + void (*double_jacobian)(uECC_word_t *X1, uECC_word_t *Y1, uECC_word_t *Z1, + uECC_Curve curve); +#if uECC_SUPPORT_COMPRESSED_POINT + void (*mod_sqrt)(uECC_word_t *a, uECC_Curve curve); +#endif + void (*x_side)(uECC_word_t *result, const uECC_word_t *x, uECC_Curve curve); +#if (uECC_OPTIMIZATION_LEVEL > 0) + void (*mmod_fast)(uECC_word_t *result, uECC_word_t *product); +#endif +}; + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN +static void bcopy(uint8_t *dst, const uint8_t *src, unsigned num_bytes) { + while (0 != num_bytes) { + num_bytes--; + dst[num_bytes] = src[num_bytes]; + } +} +#endif + +static cmpresult_t uECC_vli_cmp_unsafe(const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words); + +#if (uECC_PLATFORM == uECC_arm || uECC_PLATFORM == uECC_arm_thumb || \ + uECC_PLATFORM == uECC_arm_thumb2) +#include "asm_arm.inc" +#endif + +#if (uECC_PLATFORM == uECC_avr) +#include "asm_avr.inc" +#endif + +#ifndef asm_clear +#define asm_clear 0 +#endif +#ifndef asm_set +#define asm_set 0 +#endif +#ifndef asm_add +#define asm_add 0 +#endif +#ifndef asm_sub +#define asm_sub 0 +#endif +#ifndef asm_mult +#define asm_mult 0 +#endif +#ifndef asm_rshift1 +#define asm_rshift1 0 +#endif +#ifndef asm_mmod_fast_secp256r1 +#define asm_mmod_fast_secp256r1 0 +#endif + +#if defined(default_RNG_defined) && default_RNG_defined +static uECC_RNG_Function g_rng_function = &default_RNG; +#else +static uECC_RNG_Function g_rng_function = 0; +#endif + +void uECC_set_rng(uECC_RNG_Function rng_function) { + g_rng_function = rng_function; +} + +uECC_RNG_Function uECC_get_rng(void) { + return g_rng_function; +} + +int uECC_curve_private_key_size(uECC_Curve curve) { + return BITS_TO_BYTES(curve->num_n_bits); +} + +int uECC_curve_public_key_size(uECC_Curve curve) { + return 2 * curve->num_bytes; +} + +#if !asm_clear +uECC_VLI_API void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words) { + wordcount_t i; + for (i = 0; i < num_words; ++i) { + vli[i] = 0; + } +} +#endif /* !asm_clear */ + +/* Constant-time comparison to zero - secure way to compare long integers */ +/* Returns 1 if vli == 0, 0 otherwise. */ +uECC_VLI_API uECC_word_t uECC_vli_isZero(const uECC_word_t *vli, + wordcount_t num_words) { + uECC_word_t bits = 0; + wordcount_t i; + for (i = 0; i < num_words; ++i) { + bits |= vli[i]; + } + return (bits == 0); +} + +/* Returns nonzero if bit 'bit' of vli is set. */ +uECC_VLI_API uECC_word_t uECC_vli_testBit(const uECC_word_t *vli, + bitcount_t bit) { + return (vli[bit >> uECC_WORD_BITS_SHIFT] & + ((uECC_word_t) 1 << (bit & uECC_WORD_BITS_MASK))); +} + +/* Counts the number of words in vli. */ +static wordcount_t vli_numDigits(const uECC_word_t *vli, + const wordcount_t max_words) { + wordcount_t i; + /* Search from the end until we find a non-zero digit. + We do it in reverse because we expect that most digits will be nonzero. */ + for (i = max_words - 1; i >= 0 && vli[i] == 0; --i) { + } + + return (i + 1); +} + +/* Counts the number of bits required to represent vli. */ +uECC_VLI_API bitcount_t uECC_vli_numBits(const uECC_word_t *vli, + const wordcount_t max_words) { + uECC_word_t i; + uECC_word_t digit; + + wordcount_t num_digits = vli_numDigits(vli, max_words); + if (num_digits == 0) { + return 0; + } + + digit = vli[num_digits - 1]; + for (i = 0; digit; ++i) { + digit >>= 1; + } + + return (((bitcount_t) ((num_digits - 1) << uECC_WORD_BITS_SHIFT)) + + (bitcount_t) i); +} + +/* Sets dest = src. */ +#if !asm_set +uECC_VLI_API void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, + wordcount_t num_words) { + wordcount_t i; + for (i = 0; i < num_words; ++i) { + dest[i] = src[i]; + } +} +#endif /* !asm_set */ + +/* Returns sign of left - right. */ +static cmpresult_t uECC_vli_cmp_unsafe(const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words) { + wordcount_t i; + for (i = num_words - 1; i >= 0; --i) { + if (left[i] > right[i]) { + return 1; + } else if (left[i] < right[i]) { + return -1; + } + } + return 0; +} + +/* Constant-time comparison function - secure way to compare long integers */ +/* Returns one if left == right, zero otherwise. */ +uECC_VLI_API uECC_word_t uECC_vli_equal(const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words) { + uECC_word_t diff = 0; + wordcount_t i; + for (i = num_words - 1; i >= 0; --i) { + diff |= (left[i] ^ right[i]); + } + return (diff == 0); +} + +uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, + const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words); + +/* Returns sign of left - right, in constant time. */ +uECC_VLI_API cmpresult_t uECC_vli_cmp(const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words) { + uECC_word_t tmp[uECC_MAX_WORDS]; + uECC_word_t neg = !!uECC_vli_sub(tmp, left, right, num_words); + uECC_word_t equal = uECC_vli_isZero(tmp, num_words); + return (cmpresult_t) (!equal - 2 * neg); +} + +/* Computes vli = vli >> 1. */ +#if !asm_rshift1 +uECC_VLI_API void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words) { + uECC_word_t *end = vli; + uECC_word_t carry = 0; + + vli += num_words; + while (vli-- > end) { + uECC_word_t temp = *vli; + *vli = (temp >> 1) | carry; + carry = temp << (uECC_WORD_BITS - 1); + } +} +#endif /* !asm_rshift1 */ + +/* Computes result = left + right, returning carry. Can modify in place. */ +#if !asm_add +uECC_VLI_API uECC_word_t uECC_vli_add(uECC_word_t *result, + const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words) { + uECC_word_t carry = 0; + wordcount_t i; + for (i = 0; i < num_words; ++i) { + uECC_word_t sum = left[i] + right[i] + carry; + if (sum != left[i]) { + carry = (sum < left[i]); + } + result[i] = sum; + } + return carry; +} +#endif /* !asm_add */ + +/* Computes result = left - right, returning borrow. Can modify in place. */ +#if !asm_sub +uECC_VLI_API uECC_word_t uECC_vli_sub(uECC_word_t *result, + const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words) { + uECC_word_t borrow = 0; + wordcount_t i; + for (i = 0; i < num_words; ++i) { + uECC_word_t diff = left[i] - right[i] - borrow; + if (diff != left[i]) { + borrow = (diff > left[i]); + } + result[i] = diff; + } + return borrow; +} +#endif /* !asm_sub */ + +#if !asm_mult || (uECC_SQUARE_FUNC && !asm_square) || \ + (uECC_SUPPORTS_secp256k1 && (uECC_OPTIMIZATION_LEVEL > 0) && \ + ((uECC_WORD_SIZE == 1) || (uECC_WORD_SIZE == 8))) +static void muladd(uECC_word_t a, uECC_word_t b, uECC_word_t *r0, + uECC_word_t *r1, uECC_word_t *r2) { +#if uECC_WORD_SIZE == 8 + uint64_t a0 = a & 0xffffffff; + uint64_t a1 = a >> 32; + uint64_t b0 = b & 0xffffffff; + uint64_t b1 = b >> 32; + + uint64_t i0 = a0 * b0; + uint64_t i1 = a0 * b1; + uint64_t i2 = a1 * b0; + uint64_t i3 = a1 * b1; + + uint64_t p0, p1; + + i2 += (i0 >> 32); + i2 += i1; + if (i2 < i1) { /* overflow */ + i3 += 0x100000000; + } + + p0 = (i0 & 0xffffffff) | (i2 << 32); + p1 = i3 + (i2 >> 32); + + *r0 += p0; + *r1 += (p1 + (*r0 < p0)); + *r2 += ((*r1 < p1) || (*r1 == p1 && *r0 < p0)); +#else + uECC_dword_t p = (uECC_dword_t) a * b; + uECC_dword_t r01 = ((uECC_dword_t) (*r1) << uECC_WORD_BITS) | *r0; + r01 += p; + *r2 += (r01 < p); + *r1 = (uECC_word_t) (r01 >> uECC_WORD_BITS); + *r0 = (uECC_word_t) r01; +#endif +} +#endif /* muladd needed */ + +#if !asm_mult +uECC_VLI_API void uECC_vli_mult(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, + wordcount_t num_words) { + uECC_word_t r0 = 0; + uECC_word_t r1 = 0; + uECC_word_t r2 = 0; + wordcount_t i, k; + + /* Compute each digit of result in sequence, maintaining the carries. */ + for (k = 0; k < num_words; ++k) { + for (i = 0; i <= k; ++i) { + muladd(left[i], right[k - i], &r0, &r1, &r2); + } + result[k] = r0; + r0 = r1; + r1 = r2; + r2 = 0; + } + for (k = num_words; k < num_words * 2 - 1; ++k) { + for (i = (wordcount_t) ((k + 1) - num_words); i < num_words; ++i) { + muladd(left[i], right[k - i], &r0, &r1, &r2); + } + result[k] = r0; + r0 = r1; + r1 = r2; + r2 = 0; + } + result[num_words * 2 - 1] = r0; +} +#endif /* !asm_mult */ + +#if uECC_SQUARE_FUNC + +#if !asm_square +static void mul2add(uECC_word_t a, uECC_word_t b, uECC_word_t *r0, + uECC_word_t *r1, uECC_word_t *r2) { +#if uECC_WORD_SIZE == 8 + uint64_t a0 = a & 0xffffffffull; + uint64_t a1 = a >> 32; + uint64_t b0 = b & 0xffffffffull; + uint64_t b1 = b >> 32; + + uint64_t i0 = a0 * b0; + uint64_t i1 = a0 * b1; + uint64_t i2 = a1 * b0; + uint64_t i3 = a1 * b1; + + uint64_t p0, p1; + + i2 += (i0 >> 32); + i2 += i1; + if (i2 < i1) { /* overflow */ + i3 += 0x100000000ull; + } + + p0 = (i0 & 0xffffffffull) | (i2 << 32); + p1 = i3 + (i2 >> 32); + + *r2 += (p1 >> 63); + p1 = (p1 << 1) | (p0 >> 63); + p0 <<= 1; + + *r0 += p0; + *r1 += (p1 + (*r0 < p0)); + *r2 += ((*r1 < p1) || (*r1 == p1 && *r0 < p0)); +#else + uECC_dword_t p = (uECC_dword_t) a * b; + uECC_dword_t r01 = ((uECC_dword_t) (*r1) << uECC_WORD_BITS) | *r0; + *r2 += (p >> (uECC_WORD_BITS * 2 - 1)); + p *= 2; + r01 += p; + *r2 += (r01 < p); + *r1 = r01 >> uECC_WORD_BITS; + *r0 = (uECC_word_t) r01; +#endif +} + +uECC_VLI_API void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, + wordcount_t num_words) { + uECC_word_t r0 = 0; + uECC_word_t r1 = 0; + uECC_word_t r2 = 0; + + wordcount_t i, k; + + for (k = 0; k < num_words * 2 - 1; ++k) { + uECC_word_t min = (k < num_words ? 0 : (k + 1) - num_words); + for (i = min; i <= k && i <= k - i; ++i) { + if (i < k - i) { + mul2add(left[i], left[k - i], &r0, &r1, &r2); + } else { + muladd(left[i], left[k - i], &r0, &r1, &r2); + } + } + result[k] = r0; + r0 = r1; + r1 = r2; + r2 = 0; + } + + result[num_words * 2 - 1] = r0; +} +#endif /* !asm_square */ + +#else /* uECC_SQUARE_FUNC */ + +#if uECC_ENABLE_VLI_API +uECC_VLI_API void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, + wordcount_t num_words) { + uECC_vli_mult(result, left, left, num_words); +} +#endif /* uECC_ENABLE_VLI_API */ + +#endif /* uECC_SQUARE_FUNC */ + +/* Computes result = (left + right) % mod. + Assumes that left < mod and right < mod, and that result does not overlap + mod. */ +uECC_VLI_API void uECC_vli_modAdd(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, + const uECC_word_t *mod, + wordcount_t num_words) { + uECC_word_t carry = uECC_vli_add(result, left, right, num_words); + if (carry || uECC_vli_cmp_unsafe(mod, result, num_words) != 1) { + /* result > mod (result = mod + remainder), so subtract mod to get + * remainder. */ + uECC_vli_sub(result, result, mod, num_words); + } +} + +/* Computes result = (left - right) % mod. + Assumes that left < mod and right < mod, and that result does not overlap + mod. */ +uECC_VLI_API void uECC_vli_modSub(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, + const uECC_word_t *mod, + wordcount_t num_words) { + uECC_word_t l_borrow = uECC_vli_sub(result, left, right, num_words); + if (l_borrow) { + /* In this case, result == -diff == (max int) - diff. Since -x % d == d - x, + we can get the correct result from result + mod (with overflow). */ + uECC_vli_add(result, result, mod, num_words); + } +} + +/* Computes result = product % mod, where product is 2N words long. */ +/* Currently only designed to work for curve_p or curve_n. */ +uECC_VLI_API void uECC_vli_mmod(uECC_word_t *result, uECC_word_t *product, + const uECC_word_t *mod, wordcount_t num_words) { + uECC_word_t mod_multiple[2 * uECC_MAX_WORDS]; + uECC_word_t tmp[2 * uECC_MAX_WORDS]; + uECC_word_t *v[2] = {tmp, product}; + uECC_word_t index; + + /* Shift mod so its highest set bit is at the maximum position. */ + bitcount_t shift = (bitcount_t) ( + (num_words * 2 * uECC_WORD_BITS) - uECC_vli_numBits(mod, num_words)); + wordcount_t word_shift = (wordcount_t) (shift / uECC_WORD_BITS); + wordcount_t bit_shift = (wordcount_t) (shift % uECC_WORD_BITS); + uECC_word_t carry = 0; + uECC_vli_clear(mod_multiple, word_shift); + if (bit_shift > 0) { + for (index = 0; index < (uECC_word_t) num_words; ++index) { + mod_multiple[(uECC_word_t) word_shift + index] = + (uECC_word_t) (mod[index] << bit_shift) | carry; + carry = mod[index] >> (uECC_WORD_BITS - bit_shift); + } + } else { + uECC_vli_set(mod_multiple + word_shift, mod, num_words); + } + + for (index = 1; shift >= 0; --shift) { + uECC_word_t borrow = 0; + wordcount_t i; + for (i = 0; i < num_words * 2; ++i) { + uECC_word_t diff = v[index][i] - mod_multiple[i] - borrow; + if (diff != v[index][i]) { + borrow = (diff > v[index][i]); + } + v[1 - index][i] = diff; + } + index = !(index ^ borrow); /* Swap the index if there was no borrow */ + uECC_vli_rshift1(mod_multiple, num_words); + mod_multiple[num_words - 1] |= mod_multiple[num_words] + << (uECC_WORD_BITS - 1); + uECC_vli_rshift1(mod_multiple + num_words, num_words); + } + uECC_vli_set(result, v[index], num_words); +} + +/* Computes result = (left * right) % mod. */ +uECC_VLI_API void uECC_vli_modMult(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, + const uECC_word_t *mod, + wordcount_t num_words) { + uECC_word_t product[2 * uECC_MAX_WORDS]; + uECC_vli_mult(product, left, right, num_words); + uECC_vli_mmod(result, product, mod, num_words); +} + +uECC_VLI_API void uECC_vli_modMult_fast(uECC_word_t *result, + const uECC_word_t *left, + const uECC_word_t *right, + uECC_Curve curve) { + uECC_word_t product[2 * uECC_MAX_WORDS]; + uECC_vli_mult(product, left, right, curve->num_words); +#if (uECC_OPTIMIZATION_LEVEL > 0) + curve->mmod_fast(result, product); +#else + uECC_vli_mmod(result, product, curve->p, curve->num_words); +#endif +} + +#if uECC_SQUARE_FUNC + +#if uECC_ENABLE_VLI_API +/* Computes result = left^2 % mod. */ +uECC_VLI_API void uECC_vli_modSquare(uECC_word_t *result, + const uECC_word_t *left, + const uECC_word_t *mod, + wordcount_t num_words) { + uECC_word_t product[2 * uECC_MAX_WORDS]; + uECC_vli_square(product, left, num_words); + uECC_vli_mmod(result, product, mod, num_words); +} +#endif /* uECC_ENABLE_VLI_API */ + +uECC_VLI_API void uECC_vli_modSquare_fast(uECC_word_t *result, + const uECC_word_t *left, + uECC_Curve curve) { + uECC_word_t product[2 * uECC_MAX_WORDS]; + uECC_vli_square(product, left, curve->num_words); +#if (uECC_OPTIMIZATION_LEVEL > 0) + curve->mmod_fast(result, product); +#else + uECC_vli_mmod(result, product, curve->p, curve->num_words); +#endif +} + +#else /* uECC_SQUARE_FUNC */ + +#if uECC_ENABLE_VLI_API +uECC_VLI_API void uECC_vli_modSquare(uECC_word_t *result, + const uECC_word_t *left, + const uECC_word_t *mod, + wordcount_t num_words) { + uECC_vli_modMult(result, left, left, mod, num_words); +} +#endif /* uECC_ENABLE_VLI_API */ + +uECC_VLI_API void uECC_vli_modSquare_fast(uECC_word_t *result, + const uECC_word_t *left, + uECC_Curve curve) { + uECC_vli_modMult_fast(result, left, left, curve); +} + +#endif /* uECC_SQUARE_FUNC */ + +#define EVEN(vli) (!(vli[0] & 1)) +static void vli_modInv_update(uECC_word_t *uv, const uECC_word_t *mod, + wordcount_t num_words) { + uECC_word_t carry = 0; + if (!EVEN(uv)) { + carry = uECC_vli_add(uv, uv, mod, num_words); + } + uECC_vli_rshift1(uv, num_words); + if (carry) { + uv[num_words - 1] |= HIGH_BIT_SET; + } +} + +/* Computes result = (1 / input) % mod. All VLIs are the same size. + See "From Euclid's GCD to Montgomery Multiplication to the Great Divide" */ +uECC_VLI_API void uECC_vli_modInv(uECC_word_t *result, const uECC_word_t *input, + const uECC_word_t *mod, + wordcount_t num_words) { + uECC_word_t a[uECC_MAX_WORDS], b[uECC_MAX_WORDS], u[uECC_MAX_WORDS], + v[uECC_MAX_WORDS]; + cmpresult_t cmpResult; + + if (uECC_vli_isZero(input, num_words)) { + uECC_vli_clear(result, num_words); + return; + } + + uECC_vli_set(a, input, num_words); + uECC_vli_set(b, mod, num_words); + uECC_vli_clear(u, num_words); + u[0] = 1; + uECC_vli_clear(v, num_words); + while ((cmpResult = uECC_vli_cmp_unsafe(a, b, num_words)) != 0) { + if (EVEN(a)) { + uECC_vli_rshift1(a, num_words); + vli_modInv_update(u, mod, num_words); + } else if (EVEN(b)) { + uECC_vli_rshift1(b, num_words); + vli_modInv_update(v, mod, num_words); + } else if (cmpResult > 0) { + uECC_vli_sub(a, a, b, num_words); + uECC_vli_rshift1(a, num_words); + if (uECC_vli_cmp_unsafe(u, v, num_words) < 0) { + uECC_vli_add(u, u, mod, num_words); + } + uECC_vli_sub(u, u, v, num_words); + vli_modInv_update(u, mod, num_words); + } else { + uECC_vli_sub(b, b, a, num_words); + uECC_vli_rshift1(b, num_words); + if (uECC_vli_cmp_unsafe(v, u, num_words) < 0) { + uECC_vli_add(v, v, mod, num_words); + } + uECC_vli_sub(v, v, u, num_words); + vli_modInv_update(v, mod, num_words); + } + } + uECC_vli_set(result, u, num_words); +} + +/* ------ Point operations ------ */ + +/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ + +#ifndef _UECC_CURVE_SPECIFIC_H_ +#define _UECC_CURVE_SPECIFIC_H_ + +#define num_bytes_secp160r1 20 +#define num_bytes_secp192r1 24 +#define num_bytes_secp224r1 28 +#define num_bytes_secp256r1 32 +#define num_bytes_secp256k1 32 + +#if (uECC_WORD_SIZE == 1) + +#define num_words_secp160r1 20 +#define num_words_secp192r1 24 +#define num_words_secp224r1 28 +#define num_words_secp256r1 32 +#define num_words_secp256k1 32 + +#define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) \ + 0x##a, 0x##b, 0x##c, 0x##d, 0x##e, 0x##f, 0x##g, 0x##h +#define BYTES_TO_WORDS_4(a, b, c, d) 0x##a, 0x##b, 0x##c, 0x##d + +#elif (uECC_WORD_SIZE == 4) + +#define num_words_secp160r1 5 +#define num_words_secp192r1 6 +#define num_words_secp224r1 7 +#define num_words_secp256r1 8 +#define num_words_secp256k1 8 + +#define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) 0x##d##c##b##a, 0x##h##g##f##e +#define BYTES_TO_WORDS_4(a, b, c, d) 0x##d##c##b##a + +#elif (uECC_WORD_SIZE == 8) + +#define num_words_secp160r1 3 +#define num_words_secp192r1 3 +#define num_words_secp224r1 4 +#define num_words_secp256r1 4 +#define num_words_secp256k1 4 + +#define BYTES_TO_WORDS_8(a, b, c, d, e, f, g, h) 0x##h##g##f##e##d##c##b##a##U +#define BYTES_TO_WORDS_4(a, b, c, d) 0x##d##c##b##a##U + +#endif /* uECC_WORD_SIZE */ + +#if uECC_SUPPORTS_secp160r1 || uECC_SUPPORTS_secp192r1 || \ + uECC_SUPPORTS_secp224r1 || uECC_SUPPORTS_secp256r1 +static void double_jacobian_default(uECC_word_t *X1, uECC_word_t *Y1, + uECC_word_t *Z1, uECC_Curve curve) { + /* t1 = X, t2 = Y, t3 = Z */ + uECC_word_t t4[uECC_MAX_WORDS]; + uECC_word_t t5[uECC_MAX_WORDS]; + wordcount_t num_words = curve->num_words; + + if (uECC_vli_isZero(Z1, num_words)) { + return; + } + + uECC_vli_modSquare_fast(t4, Y1, curve); /* t4 = y1^2 */ + uECC_vli_modMult_fast(t5, X1, t4, curve); /* t5 = x1*y1^2 = A */ + uECC_vli_modSquare_fast(t4, t4, curve); /* t4 = y1^4 */ + uECC_vli_modMult_fast(Y1, Y1, Z1, curve); /* t2 = y1*z1 = z3 */ + uECC_vli_modSquare_fast(Z1, Z1, curve); /* t3 = z1^2 */ + + uECC_vli_modAdd(X1, X1, Z1, curve->p, num_words); /* t1 = x1 + z1^2 */ + uECC_vli_modAdd(Z1, Z1, Z1, curve->p, num_words); /* t3 = 2*z1^2 */ + uECC_vli_modSub(Z1, X1, Z1, curve->p, num_words); /* t3 = x1 - z1^2 */ + uECC_vli_modMult_fast(X1, X1, Z1, curve); /* t1 = x1^2 - z1^4 */ + + uECC_vli_modAdd(Z1, X1, X1, curve->p, num_words); /* t3 = 2*(x1^2 - z1^4) */ + uECC_vli_modAdd(X1, X1, Z1, curve->p, num_words); /* t1 = 3*(x1^2 - z1^4) */ + if (uECC_vli_testBit(X1, 0)) { + uECC_word_t l_carry = uECC_vli_add(X1, X1, curve->p, num_words); + uECC_vli_rshift1(X1, num_words); + X1[num_words - 1] |= l_carry << (uECC_WORD_BITS - 1); + } else { + uECC_vli_rshift1(X1, num_words); + } + /* t1 = 3/2*(x1^2 - z1^4) = B */ + + uECC_vli_modSquare_fast(Z1, X1, curve); /* t3 = B^2 */ + uECC_vli_modSub(Z1, Z1, t5, curve->p, num_words); /* t3 = B^2 - A */ + uECC_vli_modSub(Z1, Z1, t5, curve->p, num_words); /* t3 = B^2 - 2A = x3 */ + uECC_vli_modSub(t5, t5, Z1, curve->p, num_words); /* t5 = A - x3 */ + uECC_vli_modMult_fast(X1, X1, t5, curve); /* t1 = B * (A - x3) */ + uECC_vli_modSub(t4, X1, t4, curve->p, + num_words); /* t4 = B * (A - x3) - y1^4 = y3 */ + + uECC_vli_set(X1, Z1, num_words); + uECC_vli_set(Z1, Y1, num_words); + uECC_vli_set(Y1, t4, num_words); +} + +/* Computes result = x^3 + ax + b. result must not overlap x. */ +static void x_side_default(uECC_word_t *result, const uECC_word_t *x, + uECC_Curve curve) { + uECC_word_t _3[uECC_MAX_WORDS] = {3}; /* -a = 3 */ + wordcount_t num_words = curve->num_words; + + uECC_vli_modSquare_fast(result, x, curve); /* r = x^2 */ + uECC_vli_modSub(result, result, _3, curve->p, num_words); /* r = x^2 - 3 */ + uECC_vli_modMult_fast(result, result, x, curve); /* r = x^3 - 3x */ + uECC_vli_modAdd(result, result, curve->b, curve->p, + num_words); /* r = x^3 - 3x + b */ +} +#endif /* uECC_SUPPORTS_secp... */ + +#if uECC_SUPPORT_COMPRESSED_POINT +#if uECC_SUPPORTS_secp160r1 || uECC_SUPPORTS_secp192r1 || \ + uECC_SUPPORTS_secp256r1 || uECC_SUPPORTS_secp256k1 +/* Compute a = sqrt(a) (mod curve_p). */ +static void mod_sqrt_default(uECC_word_t *a, uECC_Curve curve) { + bitcount_t i; + uECC_word_t p1[uECC_MAX_WORDS] = {1}; + uECC_word_t l_result[uECC_MAX_WORDS] = {1}; + wordcount_t num_words = curve->num_words; + + /* When curve->p == 3 (mod 4), we can compute + sqrt(a) = a^((curve->p + 1) / 4) (mod curve->p). */ + uECC_vli_add(p1, curve->p, p1, num_words); /* p1 = curve_p + 1 */ + for (i = uECC_vli_numBits(p1, num_words) - 1; i > 1; --i) { + uECC_vli_modSquare_fast(l_result, l_result, curve); + if (uECC_vli_testBit(p1, i)) { + uECC_vli_modMult_fast(l_result, l_result, a, curve); + } + } + uECC_vli_set(a, l_result, num_words); +} +#endif /* uECC_SUPPORTS_secp... */ +#endif /* uECC_SUPPORT_COMPRESSED_POINT */ + +#if uECC_SUPPORTS_secp160r1 + +#if (uECC_OPTIMIZATION_LEVEL > 0) +static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product); +#endif + +static const struct uECC_Curve_t curve_secp160r1 = { + num_words_secp160r1, + num_bytes_secp160r1, + 161, /* num_n_bits */ + {BYTES_TO_WORDS_8(FF, FF, FF, 7F, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_4(FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(57, 22, 75, CA, D3, AE, 27, F9), + BYTES_TO_WORDS_8(C8, F4, 01, 00, 00, 00, 00, 00), + BYTES_TO_WORDS_8(00, 00, 00, 00, 01, 00, 00, 00)}, + {BYTES_TO_WORDS_8(82, FC, CB, 13, B9, 8B, C3, 68), + BYTES_TO_WORDS_8(89, 69, 64, 46, 28, 73, F5, 8E), + BYTES_TO_WORDS_4(68, B5, 96, 4A), + + BYTES_TO_WORDS_8(32, FB, C5, 7A, 37, 51, 23, 04), + BYTES_TO_WORDS_8(12, C9, DC, 59, 7D, 94, 68, 31), + BYTES_TO_WORDS_4(55, 28, A6, 23)}, + {BYTES_TO_WORDS_8(45, FA, 65, C5, AD, D4, D4, 81), + BYTES_TO_WORDS_8(9F, F8, AC, 65, 8B, 7A, BD, 54), + BYTES_TO_WORDS_4(FC, BE, 97, 1C)}, + &double_jacobian_default, +#if uECC_SUPPORT_COMPRESSED_POINT + &mod_sqrt_default, +#endif + &x_side_default, +#if (uECC_OPTIMIZATION_LEVEL > 0) + &vli_mmod_fast_secp160r1 +#endif +}; + +uECC_Curve uECC_secp160r1(void) { + return &curve_secp160r1; +} + +#if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp160r1) +/* Computes result = product % curve_p + see http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf page 354 + + Note that this only works if log2(omega) < log2(p) / 2 */ +static void omega_mult_secp160r1(uECC_word_t *result, const uECC_word_t *right); +#if uECC_WORD_SIZE == 8 +static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) { + uECC_word_t tmp[2 * num_words_secp160r1]; + uECC_word_t copy; + + uECC_vli_clear(tmp, num_words_secp160r1); + uECC_vli_clear(tmp + num_words_secp160r1, num_words_secp160r1); + + omega_mult_secp160r1(tmp, + product + num_words_secp160r1 - 1); /* (Rq, q) = q * c */ + + product[num_words_secp160r1 - 1] &= 0xffffffff; + copy = tmp[num_words_secp160r1 - 1]; + tmp[num_words_secp160r1 - 1] &= 0xffffffff; + uECC_vli_add(result, product, tmp, num_words_secp160r1); /* (C, r) = r + q */ + uECC_vli_clear(product, num_words_secp160r1); + tmp[num_words_secp160r1 - 1] = copy; + omega_mult_secp160r1(product, tmp + num_words_secp160r1 - 1); /* Rq*c */ + uECC_vli_add(result, result, product, + num_words_secp160r1); /* (C1, r) = r + Rq*c */ + + while (uECC_vli_cmp_unsafe(result, curve_secp160r1.p, num_words_secp160r1) > + 0) { + uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1); + } +} + +static void omega_mult_secp160r1(uint64_t *result, const uint64_t *right) { + uint32_t carry; + unsigned i; + + /* Multiply by (2^31 + 1). */ + carry = 0; + for (i = 0; i < num_words_secp160r1; ++i) { + uint64_t tmp = (right[i] >> 32) | (right[i + 1] << 32); + result[i] = (tmp << 31) + tmp + carry; + carry = (tmp >> 33) + (result[i] < tmp || (carry && result[i] == tmp)); + } + result[i] = carry; +} +#else +static void vli_mmod_fast_secp160r1(uECC_word_t *result, uECC_word_t *product) { + uECC_word_t tmp[2 * num_words_secp160r1]; + uECC_word_t carry; + + uECC_vli_clear(tmp, num_words_secp160r1); + uECC_vli_clear(tmp + num_words_secp160r1, num_words_secp160r1); + + omega_mult_secp160r1(tmp, + product + num_words_secp160r1); /* (Rq, q) = q * c */ + + carry = uECC_vli_add(result, product, tmp, + num_words_secp160r1); /* (C, r) = r + q */ + uECC_vli_clear(product, num_words_secp160r1); + omega_mult_secp160r1(product, tmp + num_words_secp160r1); /* Rq*c */ + carry += uECC_vli_add(result, result, product, + num_words_secp160r1); /* (C1, r) = r + Rq*c */ + + while (carry > 0) { + --carry; + uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1); + } + if (uECC_vli_cmp_unsafe(result, curve_secp160r1.p, num_words_secp160r1) > 0) { + uECC_vli_sub(result, result, curve_secp160r1.p, num_words_secp160r1); + } +} +#endif + +#if uECC_WORD_SIZE == 1 +static void omega_mult_secp160r1(uint8_t *result, const uint8_t *right) { + uint8_t carry; + uint8_t i; + + /* Multiply by (2^31 + 1). */ + uECC_vli_set(result + 4, right, num_words_secp160r1); /* 2^32 */ + uECC_vli_rshift1(result + 4, num_words_secp160r1); /* 2^31 */ + result[3] = right[0] << 7; /* get last bit from shift */ + + carry = + uECC_vli_add(result, result, right, num_words_secp160r1); /* 2^31 + 1 */ + for (i = num_words_secp160r1; carry; ++i) { + uint16_t sum = (uint16_t) result[i] + carry; + result[i] = (uint8_t) sum; + carry = sum >> 8; + } +} +#elif uECC_WORD_SIZE == 4 +static void omega_mult_secp160r1(uint32_t *result, const uint32_t *right) { + uint32_t carry; + unsigned i; + + /* Multiply by (2^31 + 1). */ + uECC_vli_set(result + 1, right, num_words_secp160r1); /* 2^32 */ + uECC_vli_rshift1(result + 1, num_words_secp160r1); /* 2^31 */ + result[0] = right[0] << 31; /* get last bit from shift */ + + carry = + uECC_vli_add(result, result, right, num_words_secp160r1); /* 2^31 + 1 */ + for (i = num_words_secp160r1; carry; ++i) { + uint64_t sum = (uint64_t) result[i] + carry; + result[i] = (uint32_t) sum; + carry = sum >> 32; + } +} +#endif /* uECC_WORD_SIZE */ +#endif /* (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp160r1) */ + +#endif /* uECC_SUPPORTS_secp160r1 */ + +#if uECC_SUPPORTS_secp192r1 + +#if (uECC_OPTIMIZATION_LEVEL > 0) +static void vli_mmod_fast_secp192r1(uECC_word_t *result, uECC_word_t *product); +#endif + +static const struct uECC_Curve_t curve_secp192r1 = { + num_words_secp192r1, + num_bytes_secp192r1, + 192, /* num_n_bits */ + {BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FE, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(31, 28, D2, B4, B1, C9, 6B, 14), + BYTES_TO_WORDS_8(36, F8, DE, 99, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(12, 10, FF, 82, FD, 0A, FF, F4), + BYTES_TO_WORDS_8(00, 88, A1, 43, EB, 20, BF, 7C), + BYTES_TO_WORDS_8(F6, 90, 30, B0, 0E, A8, 8D, 18), + + BYTES_TO_WORDS_8(11, 48, 79, 1E, A1, 77, F9, 73), + BYTES_TO_WORDS_8(D5, CD, 24, 6B, ED, 11, 10, 63), + BYTES_TO_WORDS_8(78, DA, C8, FF, 95, 2B, 19, 07)}, + {BYTES_TO_WORDS_8(B1, B9, 46, C1, EC, DE, B8, FE), + BYTES_TO_WORDS_8(49, 30, 24, 72, AB, E9, A7, 0F), + BYTES_TO_WORDS_8(E7, 80, 9C, E5, 19, 05, 21, 64)}, + &double_jacobian_default, +#if uECC_SUPPORT_COMPRESSED_POINT + &mod_sqrt_default, +#endif + &x_side_default, +#if (uECC_OPTIMIZATION_LEVEL > 0) + &vli_mmod_fast_secp192r1 +#endif +}; + +uECC_Curve uECC_secp192r1(void) { + return &curve_secp192r1; +} + +#if (uECC_OPTIMIZATION_LEVEL > 0) +/* Computes result = product % curve_p. + See algorithm 5 and 6 from http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf + */ +#if uECC_WORD_SIZE == 1 +static void vli_mmod_fast_secp192r1(uint8_t *result, uint8_t *product) { + uint8_t tmp[num_words_secp192r1]; + uint8_t carry; + + uECC_vli_set(result, product, num_words_secp192r1); + + uECC_vli_set(tmp, &product[24], num_words_secp192r1); + carry = uECC_vli_add(result, result, tmp, num_words_secp192r1); + + tmp[0] = tmp[1] = tmp[2] = tmp[3] = tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0; + tmp[8] = product[24]; + tmp[9] = product[25]; + tmp[10] = product[26]; + tmp[11] = product[27]; + tmp[12] = product[28]; + tmp[13] = product[29]; + tmp[14] = product[30]; + tmp[15] = product[31]; + tmp[16] = product[32]; + tmp[17] = product[33]; + tmp[18] = product[34]; + tmp[19] = product[35]; + tmp[20] = product[36]; + tmp[21] = product[37]; + tmp[22] = product[38]; + tmp[23] = product[39]; + carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); + + tmp[0] = tmp[8] = product[40]; + tmp[1] = tmp[9] = product[41]; + tmp[2] = tmp[10] = product[42]; + tmp[3] = tmp[11] = product[43]; + tmp[4] = tmp[12] = product[44]; + tmp[5] = tmp[13] = product[45]; + tmp[6] = tmp[14] = product[46]; + tmp[7] = tmp[15] = product[47]; + tmp[16] = tmp[17] = tmp[18] = tmp[19] = tmp[20] = tmp[21] = tmp[22] = + tmp[23] = 0; + carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); + + while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, + num_words_secp192r1) != 1) { + carry -= + uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1); + } +} +#elif uECC_WORD_SIZE == 4 +static void vli_mmod_fast_secp192r1(uint32_t *result, uint32_t *product) { + uint32_t tmp[num_words_secp192r1]; + int carry; + + uECC_vli_set(result, product, num_words_secp192r1); + + uECC_vli_set(tmp, &product[6], num_words_secp192r1); + carry = uECC_vli_add(result, result, tmp, num_words_secp192r1); + + tmp[0] = tmp[1] = 0; + tmp[2] = product[6]; + tmp[3] = product[7]; + tmp[4] = product[8]; + tmp[5] = product[9]; + carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); + + tmp[0] = tmp[2] = product[10]; + tmp[1] = tmp[3] = product[11]; + tmp[4] = tmp[5] = 0; + carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); + + while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, + num_words_secp192r1) != 1) { + carry -= + uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1); + } +} +#else +static void vli_mmod_fast_secp192r1(uint64_t *result, uint64_t *product) { + uint64_t tmp[num_words_secp192r1]; + int carry; + + uECC_vli_set(result, product, num_words_secp192r1); + + uECC_vli_set(tmp, &product[3], num_words_secp192r1); + carry = (int) uECC_vli_add(result, result, tmp, num_words_secp192r1); + + tmp[0] = 0; + tmp[1] = product[3]; + tmp[2] = product[4]; + carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); + + tmp[0] = tmp[1] = product[5]; + tmp[2] = 0; + carry += uECC_vli_add(result, result, tmp, num_words_secp192r1); + + while (carry || uECC_vli_cmp_unsafe(curve_secp192r1.p, result, + num_words_secp192r1) != 1) { + carry -= + uECC_vli_sub(result, result, curve_secp192r1.p, num_words_secp192r1); + } +} +#endif /* uECC_WORD_SIZE */ +#endif /* (uECC_OPTIMIZATION_LEVEL > 0) */ + +#endif /* uECC_SUPPORTS_secp192r1 */ + +#if uECC_SUPPORTS_secp224r1 + +#if uECC_SUPPORT_COMPRESSED_POINT +static void mod_sqrt_secp224r1(uECC_word_t *a, uECC_Curve curve); +#endif +#if (uECC_OPTIMIZATION_LEVEL > 0) +static void vli_mmod_fast_secp224r1(uECC_word_t *result, uECC_word_t *product); +#endif + +static const struct uECC_Curve_t curve_secp224r1 = { + num_words_secp224r1, + num_bytes_secp224r1, + 224, /* num_n_bits */ + {BYTES_TO_WORDS_8(01, 00, 00, 00, 00, 00, 00, 00), + BYTES_TO_WORDS_8(00, 00, 00, 00, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_4(FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(3D, 2A, 5C, 5C, 45, 29, DD, 13), + BYTES_TO_WORDS_8(3E, F0, B8, E0, A2, 16, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_4(FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(21, 1D, 5C, 11, D6, 80, 32, 34), + BYTES_TO_WORDS_8(22, 11, C2, 56, D3, C1, 03, 4A), + BYTES_TO_WORDS_8(B9, 90, 13, 32, 7F, BF, B4, 6B), + BYTES_TO_WORDS_4(BD, 0C, 0E, B7), + + BYTES_TO_WORDS_8(34, 7E, 00, 85, 99, 81, D5, 44), + BYTES_TO_WORDS_8(64, 47, 07, 5A, A0, 75, 43, CD), + BYTES_TO_WORDS_8(E6, DF, 22, 4C, FB, 23, F7, B5), + BYTES_TO_WORDS_4(88, 63, 37, BD)}, + {BYTES_TO_WORDS_8(B4, FF, 55, 23, 43, 39, 0B, 27), + BYTES_TO_WORDS_8(BA, D8, BF, D7, B7, B0, 44, 50), + BYTES_TO_WORDS_8(56, 32, 41, F5, AB, B3, 04, 0C), + BYTES_TO_WORDS_4(85, 0A, 05, B4)}, + &double_jacobian_default, +#if uECC_SUPPORT_COMPRESSED_POINT + &mod_sqrt_secp224r1, +#endif + &x_side_default, +#if (uECC_OPTIMIZATION_LEVEL > 0) + &vli_mmod_fast_secp224r1 +#endif +}; + +uECC_Curve uECC_secp224r1(void) { + return &curve_secp224r1; +} + +#if uECC_SUPPORT_COMPRESSED_POINT +/* Routine 3.2.4 RS; from http://www.nsa.gov/ia/_files/nist-routines.pdf */ +static void mod_sqrt_secp224r1_rs(uECC_word_t *d1, uECC_word_t *e1, + uECC_word_t *f1, const uECC_word_t *d0, + const uECC_word_t *e0, + const uECC_word_t *f0) { + uECC_word_t t[num_words_secp224r1]; + + uECC_vli_modSquare_fast(t, d0, &curve_secp224r1); /* t <-- d0 ^ 2 */ + uECC_vli_modMult_fast(e1, d0, e0, &curve_secp224r1); /* e1 <-- d0 * e0 */ + uECC_vli_modAdd(d1, t, f0, curve_secp224r1.p, + num_words_secp224r1); /* d1 <-- t + f0 */ + uECC_vli_modAdd(e1, e1, e1, curve_secp224r1.p, + num_words_secp224r1); /* e1 <-- e1 + e1 */ + uECC_vli_modMult_fast(f1, t, f0, &curve_secp224r1); /* f1 <-- t * f0 */ + uECC_vli_modAdd(f1, f1, f1, curve_secp224r1.p, + num_words_secp224r1); /* f1 <-- f1 + f1 */ + uECC_vli_modAdd(f1, f1, f1, curve_secp224r1.p, + num_words_secp224r1); /* f1 <-- f1 + f1 */ +} + +/* Routine 3.2.5 RSS; from http://www.nsa.gov/ia/_files/nist-routines.pdf */ +static void mod_sqrt_secp224r1_rss(uECC_word_t *d1, uECC_word_t *e1, + uECC_word_t *f1, const uECC_word_t *d0, + const uECC_word_t *e0, const uECC_word_t *f0, + const bitcount_t j) { + bitcount_t i; + + uECC_vli_set(d1, d0, num_words_secp224r1); /* d1 <-- d0 */ + uECC_vli_set(e1, e0, num_words_secp224r1); /* e1 <-- e0 */ + uECC_vli_set(f1, f0, num_words_secp224r1); /* f1 <-- f0 */ + for (i = 1; i <= j; i++) { + mod_sqrt_secp224r1_rs(d1, e1, f1, d1, e1, f1); /* RS (d1,e1,f1,d1,e1,f1) */ + } +} + +/* Routine 3.2.6 RM; from http://www.nsa.gov/ia/_files/nist-routines.pdf */ +static void mod_sqrt_secp224r1_rm(uECC_word_t *d2, uECC_word_t *e2, + uECC_word_t *f2, const uECC_word_t *c, + const uECC_word_t *d0, const uECC_word_t *e0, + const uECC_word_t *d1, + const uECC_word_t *e1) { + uECC_word_t t1[num_words_secp224r1]; + uECC_word_t t2[num_words_secp224r1]; + + uECC_vli_modMult_fast(t1, e0, e1, &curve_secp224r1); /* t1 <-- e0 * e1 */ + uECC_vli_modMult_fast(t1, t1, c, &curve_secp224r1); /* t1 <-- t1 * c */ + /* t1 <-- p - t1 */ + uECC_vli_modSub(t1, curve_secp224r1.p, t1, curve_secp224r1.p, + num_words_secp224r1); + uECC_vli_modMult_fast(t2, d0, d1, &curve_secp224r1); /* t2 <-- d0 * d1 */ + uECC_vli_modAdd(t2, t2, t1, curve_secp224r1.p, + num_words_secp224r1); /* t2 <-- t2 + t1 */ + uECC_vli_modMult_fast(t1, d0, e1, &curve_secp224r1); /* t1 <-- d0 * e1 */ + uECC_vli_modMult_fast(e2, d1, e0, &curve_secp224r1); /* e2 <-- d1 * e0 */ + uECC_vli_modAdd(e2, e2, t1, curve_secp224r1.p, + num_words_secp224r1); /* e2 <-- e2 + t1 */ + uECC_vli_modSquare_fast(f2, e2, &curve_secp224r1); /* f2 <-- e2^2 */ + uECC_vli_modMult_fast(f2, f2, c, &curve_secp224r1); /* f2 <-- f2 * c */ + /* f2 <-- p - f2 */ + uECC_vli_modSub(f2, curve_secp224r1.p, f2, curve_secp224r1.p, + num_words_secp224r1); + uECC_vli_set(d2, t2, num_words_secp224r1); /* d2 <-- t2 */ +} + +/* Routine 3.2.7 RP; from http://www.nsa.gov/ia/_files/nist-routines.pdf */ +static void mod_sqrt_secp224r1_rp(uECC_word_t *d1, uECC_word_t *e1, + uECC_word_t *f1, const uECC_word_t *c, + const uECC_word_t *r) { + wordcount_t i; + wordcount_t pow2i = 1; + uECC_word_t d0[num_words_secp224r1]; + uECC_word_t e0[num_words_secp224r1] = {1}; /* e0 <-- 1 */ + uECC_word_t f0[num_words_secp224r1]; + + uECC_vli_set(d0, r, num_words_secp224r1); /* d0 <-- r */ + /* f0 <-- p - c */ + uECC_vli_modSub(f0, curve_secp224r1.p, c, curve_secp224r1.p, + num_words_secp224r1); + for (i = 0; i <= 6; i++) { + mod_sqrt_secp224r1_rss(d1, e1, f1, d0, e0, f0, + pow2i); /* RSS (d1,e1,f1,d0,e0,f0,2^i) */ + mod_sqrt_secp224r1_rm(d1, e1, f1, c, d1, e1, d0, + e0); /* RM (d1,e1,f1,c,d1,e1,d0,e0) */ + uECC_vli_set(d0, d1, num_words_secp224r1); /* d0 <-- d1 */ + uECC_vli_set(e0, e1, num_words_secp224r1); /* e0 <-- e1 */ + uECC_vli_set(f0, f1, num_words_secp224r1); /* f0 <-- f1 */ + pow2i *= 2; + } +} + +/* Compute a = sqrt(a) (mod curve_p). */ +/* Routine 3.2.8 mp_mod_sqrt_224; from + * http://www.nsa.gov/ia/_files/nist-routines.pdf */ +static void mod_sqrt_secp224r1(uECC_word_t *a, uECC_Curve curve) { + (void) curve; + bitcount_t i; + uECC_word_t e1[num_words_secp224r1]; + uECC_word_t f1[num_words_secp224r1]; + uECC_word_t d0[num_words_secp224r1]; + uECC_word_t e0[num_words_secp224r1]; + uECC_word_t f0[num_words_secp224r1]; + uECC_word_t d1[num_words_secp224r1]; + + /* s = a; using constant instead of random value */ + mod_sqrt_secp224r1_rp(d0, e0, f0, a, a); /* RP (d0, e0, f0, c, s) */ + mod_sqrt_secp224r1_rs(d1, e1, f1, d0, e0, + f0); /* RS (d1, e1, f1, d0, e0, f0) */ + for (i = 1; i <= 95; i++) { + uECC_vli_set(d0, d1, num_words_secp224r1); /* d0 <-- d1 */ + uECC_vli_set(e0, e1, num_words_secp224r1); /* e0 <-- e1 */ + uECC_vli_set(f0, f1, num_words_secp224r1); /* f0 <-- f1 */ + mod_sqrt_secp224r1_rs(d1, e1, f1, d0, e0, + f0); /* RS (d1, e1, f1, d0, e0, f0) */ + if (uECC_vli_isZero(d1, num_words_secp224r1)) { /* if d1 == 0 */ + break; + } + } + uECC_vli_modInv(f1, e0, curve_secp224r1.p, + num_words_secp224r1); /* f1 <-- 1 / e0 */ + uECC_vli_modMult_fast(a, d0, f1, &curve_secp224r1); /* a <-- d0 / e0 */ +} +#endif /* uECC_SUPPORT_COMPRESSED_POINT */ + +#if (uECC_OPTIMIZATION_LEVEL > 0) +/* Computes result = product % curve_p + from http://www.nsa.gov/ia/_files/nist-routines.pdf */ +#if uECC_WORD_SIZE == 1 +static void vli_mmod_fast_secp224r1(uint8_t *result, uint8_t *product) { + uint8_t tmp[num_words_secp224r1]; + int8_t carry; + + /* t */ + uECC_vli_set(result, product, num_words_secp224r1); + + /* s1 */ + tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0; + tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0; + tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0; + tmp[12] = product[28]; + tmp[13] = product[29]; + tmp[14] = product[30]; + tmp[15] = product[31]; + tmp[16] = product[32]; + tmp[17] = product[33]; + tmp[18] = product[34]; + tmp[19] = product[35]; + tmp[20] = product[36]; + tmp[21] = product[37]; + tmp[22] = product[38]; + tmp[23] = product[39]; + tmp[24] = product[40]; + tmp[25] = product[41]; + tmp[26] = product[42]; + tmp[27] = product[43]; + carry = uECC_vli_add(result, result, tmp, num_words_secp224r1); + + /* s2 */ + tmp[12] = product[44]; + tmp[13] = product[45]; + tmp[14] = product[46]; + tmp[15] = product[47]; + tmp[16] = product[48]; + tmp[17] = product[49]; + tmp[18] = product[50]; + tmp[19] = product[51]; + tmp[20] = product[52]; + tmp[21] = product[53]; + tmp[22] = product[54]; + tmp[23] = product[55]; + tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0; + carry += uECC_vli_add(result, result, tmp, num_words_secp224r1); + + /* d1 */ + tmp[0] = product[28]; + tmp[1] = product[29]; + tmp[2] = product[30]; + tmp[3] = product[31]; + tmp[4] = product[32]; + tmp[5] = product[33]; + tmp[6] = product[34]; + tmp[7] = product[35]; + tmp[8] = product[36]; + tmp[9] = product[37]; + tmp[10] = product[38]; + tmp[11] = product[39]; + tmp[12] = product[40]; + tmp[13] = product[41]; + tmp[14] = product[42]; + tmp[15] = product[43]; + tmp[16] = product[44]; + tmp[17] = product[45]; + tmp[18] = product[46]; + tmp[19] = product[47]; + tmp[20] = product[48]; + tmp[21] = product[49]; + tmp[22] = product[50]; + tmp[23] = product[51]; + tmp[24] = product[52]; + tmp[25] = product[53]; + tmp[26] = product[54]; + tmp[27] = product[55]; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); + + /* d2 */ + tmp[0] = product[44]; + tmp[1] = product[45]; + tmp[2] = product[46]; + tmp[3] = product[47]; + tmp[4] = product[48]; + tmp[5] = product[49]; + tmp[6] = product[50]; + tmp[7] = product[51]; + tmp[8] = product[52]; + tmp[9] = product[53]; + tmp[10] = product[54]; + tmp[11] = product[55]; + tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0; + tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0; + tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0; + tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); + + if (carry < 0) { + do { + carry += + uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1); + } while (carry < 0); + } else { + while (carry || uECC_vli_cmp_unsafe(curve_secp224r1.p, result, + num_words_secp224r1) != 1) { + carry -= + uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1); + } + } +} +#elif uECC_WORD_SIZE == 4 +static void vli_mmod_fast_secp224r1(uint32_t *result, uint32_t *product) { + uint32_t tmp[num_words_secp224r1]; + int carry; + + /* t */ + uECC_vli_set(result, product, num_words_secp224r1); + + /* s1 */ + tmp[0] = tmp[1] = tmp[2] = 0; + tmp[3] = product[7]; + tmp[4] = product[8]; + tmp[5] = product[9]; + tmp[6] = product[10]; + carry = uECC_vli_add(result, result, tmp, num_words_secp224r1); + + /* s2 */ + tmp[3] = product[11]; + tmp[4] = product[12]; + tmp[5] = product[13]; + tmp[6] = 0; + carry += uECC_vli_add(result, result, tmp, num_words_secp224r1); + + /* d1 */ + tmp[0] = product[7]; + tmp[1] = product[8]; + tmp[2] = product[9]; + tmp[3] = product[10]; + tmp[4] = product[11]; + tmp[5] = product[12]; + tmp[6] = product[13]; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); + + /* d2 */ + tmp[0] = product[11]; + tmp[1] = product[12]; + tmp[2] = product[13]; + tmp[3] = tmp[4] = tmp[5] = tmp[6] = 0; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); + + if (carry < 0) { + do { + carry += + uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1); + } while (carry < 0); + } else { + while (carry || uECC_vli_cmp_unsafe(curve_secp224r1.p, result, + num_words_secp224r1) != 1) { + carry -= + uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1); + } + } +} +#else +static void vli_mmod_fast_secp224r1(uint64_t *result, uint64_t *product) { + uint64_t tmp[num_words_secp224r1]; + int carry = 0; + + /* t */ + uECC_vli_set(result, product, num_words_secp224r1); + result[num_words_secp224r1 - 1] &= 0xffffffff; + + /* s1 */ + tmp[0] = 0; + tmp[1] = product[3] & 0xffffffff00000000ull; + tmp[2] = product[4]; + tmp[3] = product[5] & 0xffffffff; + uECC_vli_add(result, result, tmp, num_words_secp224r1); + + /* s2 */ + tmp[1] = product[5] & 0xffffffff00000000ull; + tmp[2] = product[6]; + tmp[3] = 0; + uECC_vli_add(result, result, tmp, num_words_secp224r1); + + /* d1 */ + tmp[0] = (product[3] >> 32) | (product[4] << 32); + tmp[1] = (product[4] >> 32) | (product[5] << 32); + tmp[2] = (product[5] >> 32) | (product[6] << 32); + tmp[3] = product[6] >> 32; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); + + /* d2 */ + tmp[0] = (product[5] >> 32) | (product[6] << 32); + tmp[1] = product[6] >> 32; + tmp[2] = tmp[3] = 0; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp224r1); + + if (carry < 0) { + do { + carry += + uECC_vli_add(result, result, curve_secp224r1.p, num_words_secp224r1); + } while (carry < 0); + } else { + while (uECC_vli_cmp_unsafe(curve_secp224r1.p, result, + num_words_secp224r1) != 1) { + uECC_vli_sub(result, result, curve_secp224r1.p, num_words_secp224r1); + } + } +} +#endif /* uECC_WORD_SIZE */ +#endif /* (uECC_OPTIMIZATION_LEVEL > 0) */ + +#endif /* uECC_SUPPORTS_secp224r1 */ + +#if uECC_SUPPORTS_secp256r1 + +#if (uECC_OPTIMIZATION_LEVEL > 0) +static void vli_mmod_fast_secp256r1(uECC_word_t *result, uECC_word_t *product); +#endif + +static const struct uECC_Curve_t curve_secp256r1 = { + num_words_secp256r1, + num_bytes_secp256r1, + 256, /* num_n_bits */ + {BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, 00, 00, 00, 00), + BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00), + BYTES_TO_WORDS_8(01, 00, 00, 00, FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(51, 25, 63, FC, C2, CA, B9, F3), + BYTES_TO_WORDS_8(84, 9E, 17, A7, AD, FA, E6, BC), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_8(00, 00, 00, 00, FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(96, C2, 98, D8, 45, 39, A1, F4), + BYTES_TO_WORDS_8(A0, 33, EB, 2D, 81, 7D, 03, 77), + BYTES_TO_WORDS_8(F2, 40, A4, 63, E5, E6, BC, F8), + BYTES_TO_WORDS_8(47, 42, 2C, E1, F2, D1, 17, 6B), + + BYTES_TO_WORDS_8(F5, 51, BF, 37, 68, 40, B6, CB), + BYTES_TO_WORDS_8(CE, 5E, 31, 6B, 57, 33, CE, 2B), + BYTES_TO_WORDS_8(16, 9E, 0F, 7C, 4A, EB, E7, 8E), + BYTES_TO_WORDS_8(9B, 7F, 1A, FE, E2, 42, E3, 4F)}, + {BYTES_TO_WORDS_8(4B, 60, D2, 27, 3E, 3C, CE, 3B), + BYTES_TO_WORDS_8(F6, B0, 53, CC, B0, 06, 1D, 65), + BYTES_TO_WORDS_8(BC, 86, 98, 76, 55, BD, EB, B3), + BYTES_TO_WORDS_8(E7, 93, 3A, AA, D8, 35, C6, 5A)}, + &double_jacobian_default, +#if uECC_SUPPORT_COMPRESSED_POINT + &mod_sqrt_default, +#endif + &x_side_default, +#if (uECC_OPTIMIZATION_LEVEL > 0) + &vli_mmod_fast_secp256r1 +#endif +}; + +uECC_Curve uECC_secp256r1(void) { + return &curve_secp256r1; +} + +#if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256r1) +/* Computes result = product % curve_p + from http://www.nsa.gov/ia/_files/nist-routines.pdf */ +#if uECC_WORD_SIZE == 1 +static void vli_mmod_fast_secp256r1(uint8_t *result, uint8_t *product) { + uint8_t tmp[num_words_secp256r1]; + int8_t carry; + + /* t */ + uECC_vli_set(result, product, num_words_secp256r1); + + /* s1 */ + tmp[0] = tmp[1] = tmp[2] = tmp[3] = 0; + tmp[4] = tmp[5] = tmp[6] = tmp[7] = 0; + tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0; + tmp[12] = product[44]; + tmp[13] = product[45]; + tmp[14] = product[46]; + tmp[15] = product[47]; + tmp[16] = product[48]; + tmp[17] = product[49]; + tmp[18] = product[50]; + tmp[19] = product[51]; + tmp[20] = product[52]; + tmp[21] = product[53]; + tmp[22] = product[54]; + tmp[23] = product[55]; + tmp[24] = product[56]; + tmp[25] = product[57]; + tmp[26] = product[58]; + tmp[27] = product[59]; + tmp[28] = product[60]; + tmp[29] = product[61]; + tmp[30] = product[62]; + tmp[31] = product[63]; + carry = uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); + carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s2 */ + tmp[12] = product[48]; + tmp[13] = product[49]; + tmp[14] = product[50]; + tmp[15] = product[51]; + tmp[16] = product[52]; + tmp[17] = product[53]; + tmp[18] = product[54]; + tmp[19] = product[55]; + tmp[20] = product[56]; + tmp[21] = product[57]; + tmp[22] = product[58]; + tmp[23] = product[59]; + tmp[24] = product[60]; + tmp[25] = product[61]; + tmp[26] = product[62]; + tmp[27] = product[63]; + tmp[28] = tmp[29] = tmp[30] = tmp[31] = 0; + carry += uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); + carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s3 */ + tmp[0] = product[32]; + tmp[1] = product[33]; + tmp[2] = product[34]; + tmp[3] = product[35]; + tmp[4] = product[36]; + tmp[5] = product[37]; + tmp[6] = product[38]; + tmp[7] = product[39]; + tmp[8] = product[40]; + tmp[9] = product[41]; + tmp[10] = product[42]; + tmp[11] = product[43]; + tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0; + tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0; + tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0; + tmp[24] = product[56]; + tmp[25] = product[57]; + tmp[26] = product[58]; + tmp[27] = product[59]; + tmp[28] = product[60]; + tmp[29] = product[61]; + tmp[30] = product[62]; + tmp[31] = product[63]; + carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s4 */ + tmp[0] = product[36]; + tmp[1] = product[37]; + tmp[2] = product[38]; + tmp[3] = product[39]; + tmp[4] = product[40]; + tmp[5] = product[41]; + tmp[6] = product[42]; + tmp[7] = product[43]; + tmp[8] = product[44]; + tmp[9] = product[45]; + tmp[10] = product[46]; + tmp[11] = product[47]; + tmp[12] = product[52]; + tmp[13] = product[53]; + tmp[14] = product[54]; + tmp[15] = product[55]; + tmp[16] = product[56]; + tmp[17] = product[57]; + tmp[18] = product[58]; + tmp[19] = product[59]; + tmp[20] = product[60]; + tmp[21] = product[61]; + tmp[22] = product[62]; + tmp[23] = product[63]; + tmp[24] = product[52]; + tmp[25] = product[53]; + tmp[26] = product[54]; + tmp[27] = product[55]; + tmp[28] = product[32]; + tmp[29] = product[33]; + tmp[30] = product[34]; + tmp[31] = product[35]; + carry += uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* d1 */ + tmp[0] = product[44]; + tmp[1] = product[45]; + tmp[2] = product[46]; + tmp[3] = product[47]; + tmp[4] = product[48]; + tmp[5] = product[49]; + tmp[6] = product[50]; + tmp[7] = product[51]; + tmp[8] = product[52]; + tmp[9] = product[53]; + tmp[10] = product[54]; + tmp[11] = product[55]; + tmp[12] = tmp[13] = tmp[14] = tmp[15] = 0; + tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0; + tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0; + tmp[24] = product[32]; + tmp[25] = product[33]; + tmp[26] = product[34]; + tmp[27] = product[35]; + tmp[28] = product[40]; + tmp[29] = product[41]; + tmp[30] = product[42]; + tmp[31] = product[43]; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d2 */ + tmp[0] = product[48]; + tmp[1] = product[49]; + tmp[2] = product[50]; + tmp[3] = product[51]; + tmp[4] = product[52]; + tmp[5] = product[53]; + tmp[6] = product[54]; + tmp[7] = product[55]; + tmp[8] = product[56]; + tmp[9] = product[57]; + tmp[10] = product[58]; + tmp[11] = product[59]; + tmp[12] = product[60]; + tmp[13] = product[61]; + tmp[14] = product[62]; + tmp[15] = product[63]; + tmp[16] = tmp[17] = tmp[18] = tmp[19] = 0; + tmp[20] = tmp[21] = tmp[22] = tmp[23] = 0; + tmp[24] = product[36]; + tmp[25] = product[37]; + tmp[26] = product[38]; + tmp[27] = product[39]; + tmp[28] = product[44]; + tmp[29] = product[45]; + tmp[30] = product[46]; + tmp[31] = product[47]; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d3 */ + tmp[0] = product[52]; + tmp[1] = product[53]; + tmp[2] = product[54]; + tmp[3] = product[55]; + tmp[4] = product[56]; + tmp[5] = product[57]; + tmp[6] = product[58]; + tmp[7] = product[59]; + tmp[8] = product[60]; + tmp[9] = product[61]; + tmp[10] = product[62]; + tmp[11] = product[63]; + tmp[12] = product[32]; + tmp[13] = product[33]; + tmp[14] = product[34]; + tmp[15] = product[35]; + tmp[16] = product[36]; + tmp[17] = product[37]; + tmp[18] = product[38]; + tmp[19] = product[39]; + tmp[20] = product[40]; + tmp[21] = product[41]; + tmp[22] = product[42]; + tmp[23] = product[43]; + tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0; + tmp[28] = product[48]; + tmp[29] = product[49]; + tmp[30] = product[50]; + tmp[31] = product[51]; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d4 */ + tmp[0] = product[56]; + tmp[1] = product[57]; + tmp[2] = product[58]; + tmp[3] = product[59]; + tmp[4] = product[60]; + tmp[5] = product[61]; + tmp[6] = product[62]; + tmp[7] = product[63]; + tmp[8] = tmp[9] = tmp[10] = tmp[11] = 0; + tmp[12] = product[36]; + tmp[13] = product[37]; + tmp[14] = product[38]; + tmp[15] = product[39]; + tmp[16] = product[40]; + tmp[17] = product[41]; + tmp[18] = product[42]; + tmp[19] = product[43]; + tmp[20] = product[44]; + tmp[21] = product[45]; + tmp[22] = product[46]; + tmp[23] = product[47]; + tmp[24] = tmp[25] = tmp[26] = tmp[27] = 0; + tmp[28] = product[52]; + tmp[29] = product[53]; + tmp[30] = product[54]; + tmp[31] = product[55]; + carry -= uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + if (carry < 0) { + do { + carry += + uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1); + } while (carry < 0); + } else { + while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, + num_words_secp256r1) != 1) { + carry -= + uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1); + } + } +} +#elif uECC_WORD_SIZE == 4 +static void vli_mmod_fast_secp256r1(uint32_t *result, uint32_t *product) { + uint32_t tmp[num_words_secp256r1]; + int carry; + + /* t */ + uECC_vli_set(result, product, num_words_secp256r1); + + /* s1 */ + tmp[0] = tmp[1] = tmp[2] = 0; + tmp[3] = product[11]; + tmp[4] = product[12]; + tmp[5] = product[13]; + tmp[6] = product[14]; + tmp[7] = product[15]; + carry = (int) uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s2 */ + tmp[3] = product[12]; + tmp[4] = product[13]; + tmp[5] = product[14]; + tmp[6] = product[15]; + tmp[7] = 0; + carry += (int) uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s3 */ + tmp[0] = product[8]; + tmp[1] = product[9]; + tmp[2] = product[10]; + tmp[3] = tmp[4] = tmp[5] = 0; + tmp[6] = product[14]; + tmp[7] = product[15]; + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s4 */ + tmp[0] = product[9]; + tmp[1] = product[10]; + tmp[2] = product[11]; + tmp[3] = product[13]; + tmp[4] = product[14]; + tmp[5] = product[15]; + tmp[6] = product[13]; + tmp[7] = product[8]; + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* d1 */ + tmp[0] = product[11]; + tmp[1] = product[12]; + tmp[2] = product[13]; + tmp[3] = tmp[4] = tmp[5] = 0; + tmp[6] = product[8]; + tmp[7] = product[10]; + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d2 */ + tmp[0] = product[12]; + tmp[1] = product[13]; + tmp[2] = product[14]; + tmp[3] = product[15]; + tmp[4] = tmp[5] = 0; + tmp[6] = product[9]; + tmp[7] = product[11]; + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d3 */ + tmp[0] = product[13]; + tmp[1] = product[14]; + tmp[2] = product[15]; + tmp[3] = product[8]; + tmp[4] = product[9]; + tmp[5] = product[10]; + tmp[6] = 0; + tmp[7] = product[12]; + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d4 */ + tmp[0] = product[14]; + tmp[1] = product[15]; + tmp[2] = 0; + tmp[3] = product[9]; + tmp[4] = product[10]; + tmp[5] = product[11]; + tmp[6] = 0; + tmp[7] = product[13]; + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + if (carry < 0) { + do { + carry += + (int) uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1); + } while (carry < 0); + } else { + while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, + num_words_secp256r1) != 1) { + carry -= + (int) uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1); + } + } +} +#else +static void vli_mmod_fast_secp256r1(uint64_t *result, uint64_t *product) { + uint64_t tmp[num_words_secp256r1]; + int carry; + + /* t */ + uECC_vli_set(result, product, num_words_secp256r1); + + /* s1 */ + tmp[0] = 0; + tmp[1] = product[5] & 0xffffffff00000000U; + tmp[2] = product[6]; + tmp[3] = product[7]; + carry = (int) uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s2 */ + tmp[1] = product[6] << 32; + tmp[2] = (product[6] >> 32) | (product[7] << 32); + tmp[3] = product[7] >> 32; + carry += (int) uECC_vli_add(tmp, tmp, tmp, num_words_secp256r1); + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s3 */ + tmp[0] = product[4]; + tmp[1] = product[5] & 0xffffffff; + tmp[2] = 0; + tmp[3] = product[7]; + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* s4 */ + tmp[0] = (product[4] >> 32) | (product[5] << 32); + tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000U); + tmp[2] = product[7]; + tmp[3] = (product[6] >> 32) | (product[4] << 32); + carry += (int) uECC_vli_add(result, result, tmp, num_words_secp256r1); + + /* d1 */ + tmp[0] = (product[5] >> 32) | (product[6] << 32); + tmp[1] = (product[6] >> 32); + tmp[2] = 0; + tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32); + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d2 */ + tmp[0] = product[6]; + tmp[1] = product[7]; + tmp[2] = 0; + tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000); + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d3 */ + tmp[0] = (product[6] >> 32) | (product[7] << 32); + tmp[1] = (product[7] >> 32) | (product[4] << 32); + tmp[2] = (product[4] >> 32) | (product[5] << 32); + tmp[3] = (product[6] << 32); + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + /* d4 */ + tmp[0] = product[7]; + tmp[1] = product[4] & 0xffffffff00000000U; + tmp[2] = product[5]; + tmp[3] = product[6] & 0xffffffff00000000U; + carry -= (int) uECC_vli_sub(result, result, tmp, num_words_secp256r1); + + if (carry < 0) { + do { + carry += + (int) uECC_vli_add(result, result, curve_secp256r1.p, num_words_secp256r1); + } while (carry < 0); + } else { + while (carry || uECC_vli_cmp_unsafe(curve_secp256r1.p, result, + num_words_secp256r1) != 1) { + carry -= + (int) uECC_vli_sub(result, result, curve_secp256r1.p, num_words_secp256r1); + } + } +} +#endif /* uECC_WORD_SIZE */ +#endif /* (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256r1) */ + +#endif /* uECC_SUPPORTS_secp256r1 */ + +#if uECC_SUPPORTS_secp256k1 + +static void double_jacobian_secp256k1(uECC_word_t *X1, uECC_word_t *Y1, + uECC_word_t *Z1, uECC_Curve curve); +static void x_side_secp256k1(uECC_word_t *result, const uECC_word_t *x, + uECC_Curve curve); +#if (uECC_OPTIMIZATION_LEVEL > 0) +static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product); +#endif + +static const struct uECC_Curve_t curve_secp256k1 = { + num_words_secp256k1, + num_bytes_secp256k1, + 256, /* num_n_bits */ + {BYTES_TO_WORDS_8(2F, FC, FF, FF, FE, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(41, 41, 36, D0, 8C, 5E, D2, BF), + BYTES_TO_WORDS_8(3B, A0, 48, AF, E6, DC, AE, BA), + BYTES_TO_WORDS_8(FE, FF, FF, FF, FF, FF, FF, FF), + BYTES_TO_WORDS_8(FF, FF, FF, FF, FF, FF, FF, FF)}, + {BYTES_TO_WORDS_8(98, 17, F8, 16, 5B, 81, F2, 59), + BYTES_TO_WORDS_8(D9, 28, CE, 2D, DB, FC, 9B, 02), + BYTES_TO_WORDS_8(07, 0B, 87, CE, 95, 62, A0, 55), + BYTES_TO_WORDS_8(AC, BB, DC, F9, 7E, 66, BE, 79), + + BYTES_TO_WORDS_8(B8, D4, 10, FB, 8F, D0, 47, 9C), + BYTES_TO_WORDS_8(19, 54, 85, A6, 48, B4, 17, FD), + BYTES_TO_WORDS_8(A8, 08, 11, 0E, FC, FB, A4, 5D), + BYTES_TO_WORDS_8(65, C4, A3, 26, 77, DA, 3A, 48)}, + {BYTES_TO_WORDS_8(07, 00, 00, 00, 00, 00, 00, 00), + BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00), + BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00), + BYTES_TO_WORDS_8(00, 00, 00, 00, 00, 00, 00, 00)}, + &double_jacobian_secp256k1, +#if uECC_SUPPORT_COMPRESSED_POINT + &mod_sqrt_default, +#endif + &x_side_secp256k1, +#if (uECC_OPTIMIZATION_LEVEL > 0) + &vli_mmod_fast_secp256k1 +#endif +}; + +uECC_Curve uECC_secp256k1(void) { + return &curve_secp256k1; +} + +/* Double in place */ +static void double_jacobian_secp256k1(uECC_word_t *X1, uECC_word_t *Y1, + uECC_word_t *Z1, uECC_Curve curve) { + /* t1 = X, t2 = Y, t3 = Z */ + uECC_word_t t4[num_words_secp256k1]; + uECC_word_t t5[num_words_secp256k1]; + + if (uECC_vli_isZero(Z1, num_words_secp256k1)) { + return; + } + + uECC_vli_modSquare_fast(t5, Y1, curve); /* t5 = y1^2 */ + uECC_vli_modMult_fast(t4, X1, t5, curve); /* t4 = x1*y1^2 = A */ + uECC_vli_modSquare_fast(X1, X1, curve); /* t1 = x1^2 */ + uECC_vli_modSquare_fast(t5, t5, curve); /* t5 = y1^4 */ + uECC_vli_modMult_fast(Z1, Y1, Z1, curve); /* t3 = y1*z1 = z3 */ + + uECC_vli_modAdd(Y1, X1, X1, curve->p, num_words_secp256k1); /* t2 = 2*x1^2 */ + uECC_vli_modAdd(Y1, Y1, X1, curve->p, num_words_secp256k1); /* t2 = 3*x1^2 */ + if (uECC_vli_testBit(Y1, 0)) { + uECC_word_t carry = uECC_vli_add(Y1, Y1, curve->p, num_words_secp256k1); + uECC_vli_rshift1(Y1, num_words_secp256k1); + Y1[num_words_secp256k1 - 1] |= carry << (uECC_WORD_BITS - 1); + } else { + uECC_vli_rshift1(Y1, num_words_secp256k1); + } + /* t2 = 3/2*(x1^2) = B */ + + uECC_vli_modSquare_fast(X1, Y1, curve); /* t1 = B^2 */ + uECC_vli_modSub(X1, X1, t4, curve->p, num_words_secp256k1); /* t1 = B^2 - A */ + uECC_vli_modSub(X1, X1, t4, curve->p, + num_words_secp256k1); /* t1 = B^2 - 2A = x3 */ + + uECC_vli_modSub(t4, t4, X1, curve->p, num_words_secp256k1); /* t4 = A - x3 */ + uECC_vli_modMult_fast(Y1, Y1, t4, curve); /* t2 = B * (A - x3) */ + uECC_vli_modSub(Y1, Y1, t5, curve->p, + num_words_secp256k1); /* t2 = B * (A - x3) - y1^4 = y3 */ +} + +/* Computes result = x^3 + b. result must not overlap x. */ +static void x_side_secp256k1(uECC_word_t *result, const uECC_word_t *x, + uECC_Curve curve) { + uECC_vli_modSquare_fast(result, x, curve); /* r = x^2 */ + uECC_vli_modMult_fast(result, result, x, curve); /* r = x^3 */ + uECC_vli_modAdd(result, result, curve->b, curve->p, + num_words_secp256k1); /* r = x^3 + b */ +} + +#if (uECC_OPTIMIZATION_LEVEL > 0 && !asm_mmod_fast_secp256k1) +static void omega_mult_secp256k1(uECC_word_t *result, const uECC_word_t *right); +static void vli_mmod_fast_secp256k1(uECC_word_t *result, uECC_word_t *product) { + uECC_word_t tmp[2 * num_words_secp256k1]; + uECC_word_t carry; + + uECC_vli_clear(tmp, num_words_secp256k1); + uECC_vli_clear(tmp + num_words_secp256k1, num_words_secp256k1); + + omega_mult_secp256k1(tmp, + product + num_words_secp256k1); /* (Rq, q) = q * c */ + + carry = uECC_vli_add(result, product, tmp, + num_words_secp256k1); /* (C, r) = r + q */ + uECC_vli_clear(product, num_words_secp256k1); + omega_mult_secp256k1(product, tmp + num_words_secp256k1); /* Rq*c */ + carry += uECC_vli_add(result, result, product, + num_words_secp256k1); /* (C1, r) = r + Rq*c */ + + while (carry > 0) { + --carry; + uECC_vli_sub(result, result, curve_secp256k1.p, num_words_secp256k1); + } + if (uECC_vli_cmp_unsafe(result, curve_secp256k1.p, num_words_secp256k1) > 0) { + uECC_vli_sub(result, result, curve_secp256k1.p, num_words_secp256k1); + } +} + +#if uECC_WORD_SIZE == 1 +static void omega_mult_secp256k1(uint8_t *result, const uint8_t *right) { + /* Multiply by (2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */ + uECC_word_t r0 = 0; + uECC_word_t r1 = 0; + uECC_word_t r2 = 0; + wordcount_t k; + + /* Multiply by (2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */ + muladd(0xD1, right[0], &r0, &r1, &r2); + result[0] = r0; + r0 = r1; + r1 = r2; + /* r2 is still 0 */ + + for (k = 1; k < num_words_secp256k1; ++k) { + muladd(0x03, right[k - 1], &r0, &r1, &r2); + muladd(0xD1, right[k], &r0, &r1, &r2); + result[k] = r0; + r0 = r1; + r1 = r2; + r2 = 0; + } + muladd(0x03, right[num_words_secp256k1 - 1], &r0, &r1, &r2); + result[num_words_secp256k1] = r0; + result[num_words_secp256k1 + 1] = r1; + /* add the 2^32 multiple */ + result[4 + num_words_secp256k1] = + uECC_vli_add(result + 4, result + 4, right, num_words_secp256k1); +} +#elif uECC_WORD_SIZE == 4 +static void omega_mult_secp256k1(uint32_t *result, const uint32_t *right) { + /* Multiply by (2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */ + uint32_t carry = 0; + wordcount_t k; + + for (k = 0; k < num_words_secp256k1; ++k) { + uint64_t p = (uint64_t) 0x3D1 * right[k] + carry; + result[k] = (uint32_t) p; + carry = p >> 32; + } + result[num_words_secp256k1] = carry; + /* add the 2^32 multiple */ + result[1 + num_words_secp256k1] = + uECC_vli_add(result + 1, result + 1, right, num_words_secp256k1); +} +#else +static void omega_mult_secp256k1(uint64_t *result, const uint64_t *right) { + uECC_word_t r0 = 0; + uECC_word_t r1 = 0; + uECC_word_t r2 = 0; + wordcount_t k; + + /* Multiply by (2^32 + 2^9 + 2^8 + 2^7 + 2^6 + 2^4 + 1). */ + for (k = 0; k < num_words_secp256k1; ++k) { + muladd(0x1000003D1ull, right[k], &r0, &r1, &r2); + result[k] = r0; + r0 = r1; + r1 = r2; + r2 = 0; + } + result[num_words_secp256k1] = r0; +} +#endif /* uECC_WORD_SIZE */ +#endif /* (uECC_OPTIMIZATION_LEVEL > 0 && && !asm_mmod_fast_secp256k1) */ + +#endif /* uECC_SUPPORTS_secp256k1 */ + +#endif /* _UECC_CURVE_SPECIFIC_H_ */ + +/* Returns 1 if 'point' is the point at infinity, 0 otherwise. */ +#define EccPoint_isZero(point, curve) \ + uECC_vli_isZero((point), (wordcount_t) ((curve)->num_words * 2)) + +/* Point multiplication algorithm using Montgomery's ladder with co-Z +coordinates. From http://eprint.iacr.org/2011/338.pdf +*/ + +/* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */ +static void apply_z(uECC_word_t *X1, uECC_word_t *Y1, + const uECC_word_t *const Z, uECC_Curve curve) { + uECC_word_t t1[uECC_MAX_WORDS]; + + uECC_vli_modSquare_fast(t1, Z, curve); /* z^2 */ + uECC_vli_modMult_fast(X1, X1, t1, curve); /* x1 * z^2 */ + uECC_vli_modMult_fast(t1, t1, Z, curve); /* z^3 */ + uECC_vli_modMult_fast(Y1, Y1, t1, curve); /* y1 * z^3 */ +} + +/* P = (x1, y1) => 2P, (x2, y2) => P' */ +static void XYcZ_initial_double(uECC_word_t *X1, uECC_word_t *Y1, + uECC_word_t *X2, uECC_word_t *Y2, + const uECC_word_t *const initial_Z, + uECC_Curve curve) { + uECC_word_t z[uECC_MAX_WORDS]; + wordcount_t num_words = curve->num_words; + if (initial_Z) { + uECC_vli_set(z, initial_Z, num_words); + } else { + uECC_vli_clear(z, num_words); + z[0] = 1; + } + + uECC_vli_set(X2, X1, num_words); + uECC_vli_set(Y2, Y1, num_words); + + apply_z(X1, Y1, z, curve); + curve->double_jacobian(X1, Y1, z, curve); + apply_z(X2, Y2, z, curve); +} + +/* Input P = (x1, y1, Z), Q = (x2, y2, Z) + Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3) + or P => P', Q => P + Q +*/ +static void XYcZ_add(uECC_word_t *X1, uECC_word_t *Y1, uECC_word_t *X2, + uECC_word_t *Y2, uECC_Curve curve) { + /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ + uECC_word_t t5[uECC_MAX_WORDS] = {0}; + wordcount_t num_words = curve->num_words; + + uECC_vli_modSub(t5, X2, X1, curve->p, num_words); /* t5 = x2 - x1 */ + uECC_vli_modSquare_fast(t5, t5, curve); /* t5 = (x2 - x1)^2 = A */ + uECC_vli_modMult_fast(X1, X1, t5, curve); /* t1 = x1*A = B */ + uECC_vli_modMult_fast(X2, X2, t5, curve); /* t3 = x2*A = C */ + uECC_vli_modSub(Y2, Y2, Y1, curve->p, num_words); /* t4 = y2 - y1 */ + uECC_vli_modSquare_fast(t5, Y2, curve); /* t5 = (y2 - y1)^2 = D */ + + uECC_vli_modSub(t5, t5, X1, curve->p, num_words); /* t5 = D - B */ + uECC_vli_modSub(t5, t5, X2, curve->p, num_words); /* t5 = D - B - C = x3 */ + uECC_vli_modSub(X2, X2, X1, curve->p, num_words); /* t3 = C - B */ + uECC_vli_modMult_fast(Y1, Y1, X2, curve); /* t2 = y1*(C - B) */ + uECC_vli_modSub(X2, X1, t5, curve->p, num_words); /* t3 = B - x3 */ + uECC_vli_modMult_fast(Y2, Y2, X2, curve); /* t4 = (y2 - y1)*(B - x3) */ + uECC_vli_modSub(Y2, Y2, Y1, curve->p, num_words); /* t4 = y3 */ + + uECC_vli_set(X2, t5, num_words); +} + +/* Input P = (x1, y1, Z), Q = (x2, y2, Z) + Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3) + or P => P - Q, Q => P + Q +*/ +static void XYcZ_addC(uECC_word_t *X1, uECC_word_t *Y1, uECC_word_t *X2, + uECC_word_t *Y2, uECC_Curve curve) { + /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */ + uECC_word_t t5[uECC_MAX_WORDS] = {0}; + uECC_word_t t6[uECC_MAX_WORDS]; + uECC_word_t t7[uECC_MAX_WORDS]; + wordcount_t num_words = curve->num_words; + + uECC_vli_modSub(t5, X2, X1, curve->p, num_words); /* t5 = x2 - x1 */ + uECC_vli_modSquare_fast(t5, t5, curve); /* t5 = (x2 - x1)^2 = A */ + uECC_vli_modMult_fast(X1, X1, t5, curve); /* t1 = x1*A = B */ + uECC_vli_modMult_fast(X2, X2, t5, curve); /* t3 = x2*A = C */ + uECC_vli_modAdd(t5, Y2, Y1, curve->p, num_words); /* t5 = y2 + y1 */ + uECC_vli_modSub(Y2, Y2, Y1, curve->p, num_words); /* t4 = y2 - y1 */ + + uECC_vli_modSub(t6, X2, X1, curve->p, num_words); /* t6 = C - B */ + uECC_vli_modMult_fast(Y1, Y1, t6, curve); /* t2 = y1 * (C - B) = E */ + uECC_vli_modAdd(t6, X1, X2, curve->p, num_words); /* t6 = B + C */ + uECC_vli_modSquare_fast(X2, Y2, curve); /* t3 = (y2 - y1)^2 = D */ + uECC_vli_modSub(X2, X2, t6, curve->p, num_words); /* t3 = D - (B + C) = x3 */ + + uECC_vli_modSub(t7, X1, X2, curve->p, num_words); /* t7 = B - x3 */ + uECC_vli_modMult_fast(Y2, Y2, t7, curve); /* t4 = (y2 - y1)*(B - x3) */ + uECC_vli_modSub(Y2, Y2, Y1, curve->p, + num_words); /* t4 = (y2 - y1)*(B - x3) - E = y3 */ + + uECC_vli_modSquare_fast(t7, t5, curve); /* t7 = (y2 + y1)^2 = F */ + uECC_vli_modSub(t7, t7, t6, curve->p, num_words); /* t7 = F - (B + C) = x3' */ + uECC_vli_modSub(t6, t7, X1, curve->p, num_words); /* t6 = x3' - B */ + uECC_vli_modMult_fast(t6, t6, t5, curve); /* t6 = (y2+y1)*(x3' - B) */ + uECC_vli_modSub(Y1, t6, Y1, curve->p, + num_words); /* t2 = (y2+y1)*(x3' - B) - E = y3' */ + + uECC_vli_set(X1, t7, num_words); +} + +/* result may overlap point. */ +static void EccPoint_mult(uECC_word_t *result, const uECC_word_t *point, + const uECC_word_t *scalar, + const uECC_word_t *initial_Z, bitcount_t num_bits, + uECC_Curve curve) { + /* R0 and R1 */ + uECC_word_t Rx[2][uECC_MAX_WORDS]; + uECC_word_t Ry[2][uECC_MAX_WORDS]; + uECC_word_t z[uECC_MAX_WORDS]; + bitcount_t i; + uECC_word_t nb; + wordcount_t num_words = curve->num_words; + + uECC_vli_set(Rx[1], point, num_words); + uECC_vli_set(Ry[1], point + num_words, num_words); + + XYcZ_initial_double(Rx[1], Ry[1], Rx[0], Ry[0], initial_Z, curve); + + for (i = num_bits - 2; i > 0; --i) { + nb = !uECC_vli_testBit(scalar, i); + XYcZ_addC(Rx[1 - nb], Ry[1 - nb], Rx[nb], Ry[nb], curve); + XYcZ_add(Rx[nb], Ry[nb], Rx[1 - nb], Ry[1 - nb], curve); + } + + nb = !uECC_vli_testBit(scalar, 0); + XYcZ_addC(Rx[1 - nb], Ry[1 - nb], Rx[nb], Ry[nb], curve); + + /* Find final 1/Z value. */ + uECC_vli_modSub(z, Rx[1], Rx[0], curve->p, num_words); /* X1 - X0 */ + uECC_vli_modMult_fast(z, z, Ry[1 - nb], curve); /* Yb * (X1 - X0) */ + uECC_vli_modMult_fast(z, z, point, curve); /* xP * Yb * (X1 - X0) */ + uECC_vli_modInv(z, z, curve->p, num_words); /* 1 / (xP * Yb * (X1 - X0)) */ + /* yP / (xP * Yb * (X1 - X0)) */ + uECC_vli_modMult_fast(z, z, point + num_words, curve); + uECC_vli_modMult_fast(z, z, Rx[1 - nb], + curve); /* Xb * yP / (xP * Yb * (X1 - X0)) */ + /* End 1/Z calculation */ + + XYcZ_add(Rx[nb], Ry[nb], Rx[1 - nb], Ry[1 - nb], curve); + apply_z(Rx[0], Ry[0], z, curve); + + uECC_vli_set(result, Rx[0], num_words); + uECC_vli_set(result + num_words, Ry[0], num_words); +} + +static uECC_word_t regularize_k(const uECC_word_t *const k, uECC_word_t *k0, + uECC_word_t *k1, uECC_Curve curve) { + wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits); + bitcount_t num_n_bits = curve->num_n_bits; + uECC_word_t carry = + uECC_vli_add(k0, k, curve->n, num_n_words) || + (num_n_bits < ((bitcount_t) num_n_words * uECC_WORD_SIZE * 8) && + uECC_vli_testBit(k0, num_n_bits)); + uECC_vli_add(k1, k0, curve->n, num_n_words); + return carry; +} + +/* Generates a random integer in the range 0 < random < top. + Both random and top have num_words words. */ +uECC_VLI_API int uECC_generate_random_int(uECC_word_t *random, + const uECC_word_t *top, + wordcount_t num_words) { + uECC_word_t mask = (uECC_word_t) -1; + uECC_word_t tries; + bitcount_t num_bits = uECC_vli_numBits(top, num_words); + + if (!g_rng_function) { + return 0; + } + + for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) { + if (!g_rng_function((uint8_t *) random, + (unsigned int) (num_words * uECC_WORD_SIZE))) { + return 0; + } + random[num_words - 1] &= + mask >> ((bitcount_t) (num_words * uECC_WORD_SIZE * 8 - num_bits)); + if (!uECC_vli_isZero(random, num_words) && + uECC_vli_cmp(top, random, num_words) == 1) { + return 1; + } + } + return 0; +} + +static uECC_word_t EccPoint_compute_public_key(uECC_word_t *result, + uECC_word_t *private_key, + uECC_Curve curve) { + uECC_word_t tmp1[uECC_MAX_WORDS]; + uECC_word_t tmp2[uECC_MAX_WORDS]; + uECC_word_t *p2[2] = {tmp1, tmp2}; + uECC_word_t *initial_Z = 0; + uECC_word_t carry; + + /* Regularize the bitcount for the private key so that attackers cannot use a + side channel attack to learn the number of leading zeros. */ + carry = regularize_k(private_key, tmp1, tmp2, curve); + + /* If an RNG function was specified, try to get a random initial Z value to + improve protection against side-channel attacks. */ + if (g_rng_function) { + if (!uECC_generate_random_int(p2[carry], curve->p, curve->num_words)) { + return 0; + } + initial_Z = p2[carry]; + } + EccPoint_mult(result, curve->G, p2[!carry], initial_Z, + (bitcount_t) (curve->num_n_bits + 1), curve); + + if (EccPoint_isZero(result, curve)) { + return 0; + } + return 1; +} + +#if uECC_WORD_SIZE == 1 + +uECC_VLI_API void uECC_vli_nativeToBytes(uint8_t *bytes, int num_bytes, + const uint8_t *native) { + wordcount_t i; + for (i = 0; i < num_bytes; ++i) { + bytes[i] = native[(num_bytes - 1) - i]; + } +} + +uECC_VLI_API void uECC_vli_bytesToNative(uint8_t *native, const uint8_t *bytes, + int num_bytes) { + uECC_vli_nativeToBytes(native, num_bytes, bytes); +} + +#else + +uECC_VLI_API void uECC_vli_nativeToBytes(uint8_t *bytes, int num_bytes, + const uECC_word_t *native) { + int i; + for (i = 0; i < num_bytes; ++i) { + unsigned b = (unsigned) (num_bytes - 1 - i); + bytes[i] = + (uint8_t) (native[b / uECC_WORD_SIZE] >> (8 * (b % uECC_WORD_SIZE))); + } +} + +uECC_VLI_API void uECC_vli_bytesToNative(uECC_word_t *native, + const uint8_t *bytes, int num_bytes) { + int i; + uECC_vli_clear(native, (wordcount_t) ((num_bytes + (uECC_WORD_SIZE - 1)) / + uECC_WORD_SIZE)); + for (i = 0; i < num_bytes; ++i) { + unsigned b = (unsigned) (num_bytes - 1 - i); + native[b / uECC_WORD_SIZE] |= (uECC_word_t) bytes[i] + << (8 * (b % uECC_WORD_SIZE)); + } +} + +#endif /* uECC_WORD_SIZE */ + +int uECC_make_key(uint8_t *public_key, uint8_t *private_key, uECC_Curve curve) { +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + uECC_word_t *_private = (uECC_word_t *) private_key; + uECC_word_t *_public = (uECC_word_t *) public_key; +#else + uECC_word_t _private[uECC_MAX_WORDS]; + uECC_word_t _public[uECC_MAX_WORDS * 2]; +#endif + uECC_word_t tries; + + for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) { + if (!uECC_generate_random_int(_private, curve->n, + BITS_TO_WORDS(curve->num_n_bits))) { + return 0; + } + + if (EccPoint_compute_public_key(_public, _private, curve)) { +#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0 + uECC_vli_nativeToBytes(private_key, BITS_TO_BYTES(curve->num_n_bits), + _private); + uECC_vli_nativeToBytes(public_key, curve->num_bytes, _public); + uECC_vli_nativeToBytes(public_key + curve->num_bytes, curve->num_bytes, + _public + curve->num_words); +#endif + return 1; + } + } + return 0; +} + +int uECC_shared_secret(const uint8_t *public_key, const uint8_t *private_key, + uint8_t *secret, uECC_Curve curve) { + uECC_word_t _public[uECC_MAX_WORDS * 2]; + uECC_word_t _private[uECC_MAX_WORDS]; + + uECC_word_t tmp[uECC_MAX_WORDS]; + uECC_word_t *p2[2] = {_private, tmp}; + uECC_word_t *initial_Z = 0; + uECC_word_t carry; + wordcount_t num_words = curve->num_words; + wordcount_t num_bytes = curve->num_bytes; + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + bcopy((uint8_t *) _private, private_key, num_bytes); + bcopy((uint8_t *) _public, public_key, num_bytes * 2); +#else + uECC_vli_bytesToNative(_private, private_key, + BITS_TO_BYTES(curve->num_n_bits)); + uECC_vli_bytesToNative(_public, public_key, num_bytes); + uECC_vli_bytesToNative(_public + num_words, public_key + num_bytes, + num_bytes); +#endif + + /* Regularize the bitcount for the private key so that attackers cannot use a + side channel attack to learn the number of leading zeros. */ + carry = regularize_k(_private, _private, tmp, curve); + + /* If an RNG function was specified, try to get a random initial Z value to + improve protection against side-channel attacks. */ + if (g_rng_function) { + if (!uECC_generate_random_int(p2[carry], curve->p, num_words)) { + return 0; + } + initial_Z = p2[carry]; + } + + EccPoint_mult(_public, _public, p2[!carry], initial_Z, + (bitcount_t) (curve->num_n_bits + 1), curve); +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + bcopy((uint8_t *) secret, (uint8_t *) _public, num_bytes); +#else + uECC_vli_nativeToBytes(secret, num_bytes, _public); +#endif + return !EccPoint_isZero(_public, curve); +} + +#if uECC_SUPPORT_COMPRESSED_POINT +void uECC_compress(const uint8_t *public_key, uint8_t *compressed, + uECC_Curve curve) { + wordcount_t i; + for (i = 0; i < curve->num_bytes; ++i) { + compressed[i + 1] = public_key[i]; + } +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + compressed[0] = 2 + (public_key[curve->num_bytes] & 0x01); +#else + compressed[0] = 2 + (public_key[curve->num_bytes * 2 - 1] & 0x01); +#endif +} + +void uECC_decompress(const uint8_t *compressed, uint8_t *public_key, + uECC_Curve curve) { +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + uECC_word_t *point = (uECC_word_t *) public_key; +#else + uECC_word_t point[uECC_MAX_WORDS * 2]; +#endif + uECC_word_t *y = point + curve->num_words; +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + bcopy(public_key, compressed + 1, curve->num_bytes); +#else + uECC_vli_bytesToNative(point, compressed + 1, curve->num_bytes); +#endif + curve->x_side(y, point, curve); + curve->mod_sqrt(y, curve); + + if ((uint8_t) (y[0] & 0x01) != (compressed[0] & 0x01)) { + uECC_vli_sub(y, curve->p, y, curve->num_words); + } + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0 + uECC_vli_nativeToBytes(public_key, curve->num_bytes, point); + uECC_vli_nativeToBytes(public_key + curve->num_bytes, curve->num_bytes, y); +#endif +} +#endif /* uECC_SUPPORT_COMPRESSED_POINT */ + +uECC_VLI_API int uECC_valid_point(const uECC_word_t *point, uECC_Curve curve) { + uECC_word_t tmp1[uECC_MAX_WORDS]; + uECC_word_t tmp2[uECC_MAX_WORDS]; + wordcount_t num_words = curve->num_words; + + /* The point at infinity is invalid. */ + if (EccPoint_isZero(point, curve)) { + return 0; + } + + /* x and y must be smaller than p. */ + if (uECC_vli_cmp_unsafe(curve->p, point, num_words) != 1 || + uECC_vli_cmp_unsafe(curve->p, point + num_words, num_words) != 1) { + return 0; + } + + uECC_vli_modSquare_fast(tmp1, point + num_words, curve); + curve->x_side(tmp2, point, curve); /* tmp2 = x^3 + ax + b */ + + /* Make sure that y^2 == x^3 + ax + b */ + return (int) (uECC_vli_equal(tmp1, tmp2, num_words)); +} + +int uECC_valid_public_key(const uint8_t *public_key, uECC_Curve curve) { +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + uECC_word_t *_public = (uECC_word_t *) public_key; +#else + uECC_word_t _public[uECC_MAX_WORDS * 2]; +#endif + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0 + uECC_vli_bytesToNative(_public, public_key, curve->num_bytes); + uECC_vli_bytesToNative(_public + curve->num_words, + public_key + curve->num_bytes, curve->num_bytes); +#endif + return uECC_valid_point(_public, curve); +} + +int uECC_compute_public_key(const uint8_t *private_key, uint8_t *public_key, + uECC_Curve curve) { +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + uECC_word_t *_private = (uECC_word_t *) private_key; + uECC_word_t *_public = (uECC_word_t *) public_key; +#else + uECC_word_t _private[uECC_MAX_WORDS]; + uECC_word_t _public[uECC_MAX_WORDS * 2]; +#endif + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0 + uECC_vli_bytesToNative(_private, private_key, + BITS_TO_BYTES(curve->num_n_bits)); +#endif + + /* Make sure the private key is in the range [1, n-1]. */ + if (uECC_vli_isZero(_private, BITS_TO_WORDS(curve->num_n_bits))) { + return 0; + } + + if (uECC_vli_cmp(curve->n, _private, BITS_TO_WORDS(curve->num_n_bits)) != 1) { + return 0; + } + + /* Compute public key. */ + if (!EccPoint_compute_public_key(_public, _private, curve)) { + return 0; + } + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0 + uECC_vli_nativeToBytes(public_key, curve->num_bytes, _public); + uECC_vli_nativeToBytes(public_key + curve->num_bytes, curve->num_bytes, + _public + curve->num_words); +#endif + return 1; +} + +/* -------- ECDSA code -------- */ + +static void bits2int(uECC_word_t *native, const uint8_t *bits, + unsigned bits_size, uECC_Curve curve) { + unsigned num_n_bytes = (unsigned) BITS_TO_BYTES(curve->num_n_bits); + unsigned num_n_words = (unsigned) BITS_TO_WORDS(curve->num_n_bits); + int shift; + uECC_word_t carry; + uECC_word_t *ptr; + + if (bits_size > num_n_bytes) { + bits_size = num_n_bytes; + } + + uECC_vli_clear(native, (wordcount_t) num_n_words); +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + bcopy((uint8_t *) native, bits, bits_size); +#else + uECC_vli_bytesToNative(native, bits, (int) bits_size); +#endif + if (bits_size * 8 <= (unsigned) curve->num_n_bits) { + return; + } + shift = (int) bits_size * 8 - curve->num_n_bits; + carry = 0; + ptr = native + num_n_words; + while (ptr-- > native) { + uECC_word_t temp = *ptr; + *ptr = (temp >> shift) | carry; + carry = temp << (uECC_WORD_BITS - shift); + } + + /* Reduce mod curve_n */ + if (uECC_vli_cmp_unsafe(curve->n, native, (wordcount_t) num_n_words) != 1) { + uECC_vli_sub(native, native, curve->n, (wordcount_t) num_n_words); + } +} + +static int uECC_sign_with_k_internal(const uint8_t *private_key, + const uint8_t *message_hash, + unsigned hash_size, uECC_word_t *k, + uint8_t *signature, uECC_Curve curve) { + uECC_word_t tmp[uECC_MAX_WORDS]; + uECC_word_t s[uECC_MAX_WORDS]; + uECC_word_t *k2[2] = {tmp, s}; + uECC_word_t *initial_Z = 0; +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + uECC_word_t *p = (uECC_word_t *) signature; +#else + uECC_word_t p[uECC_MAX_WORDS * 2]; +#endif + uECC_word_t carry; + wordcount_t num_words = curve->num_words; + wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits); + bitcount_t num_n_bits = curve->num_n_bits; + + /* Make sure 0 < k < curve_n */ + if (uECC_vli_isZero(k, num_words) || + uECC_vli_cmp(curve->n, k, num_n_words) != 1) { + return 0; + } + + carry = regularize_k(k, tmp, s, curve); + /* If an RNG function was specified, try to get a random initial Z value to + improve protection against side-channel attacks. */ + if (g_rng_function) { + if (!uECC_generate_random_int(k2[carry], curve->p, num_words)) { + return 0; + } + initial_Z = k2[carry]; + } + EccPoint_mult(p, curve->G, k2[!carry], initial_Z, + (bitcount_t) (num_n_bits + 1), curve); + if (uECC_vli_isZero(p, num_words)) { + return 0; + } + + /* If an RNG function was specified, get a random number + to prevent side channel analysis of k. */ + if (!g_rng_function) { + uECC_vli_clear(tmp, num_n_words); + tmp[0] = 1; + } else if (!uECC_generate_random_int(tmp, curve->n, num_n_words)) { + return 0; + } + + /* Prevent side channel analysis of uECC_vli_modInv() to determine + bits of k / the private key by premultiplying by a random number */ + uECC_vli_modMult(k, k, tmp, curve->n, num_n_words); /* k' = rand * k */ + uECC_vli_modInv(k, k, curve->n, num_n_words); /* k = 1 / k' */ + uECC_vli_modMult(k, k, tmp, curve->n, num_n_words); /* k = 1 / k */ + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN == 0 + uECC_vli_nativeToBytes(signature, curve->num_bytes, p); /* store r */ +#endif + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + bcopy((uint8_t *) tmp, private_key, BITS_TO_BYTES(curve->num_n_bits)); +#else + uECC_vli_bytesToNative(tmp, private_key, + BITS_TO_BYTES(curve->num_n_bits)); /* tmp = d */ +#endif + + s[num_n_words - 1] = 0; + uECC_vli_set(s, p, num_words); + uECC_vli_modMult(s, tmp, s, curve->n, num_n_words); /* s = r*d */ + + bits2int(tmp, message_hash, hash_size, curve); + uECC_vli_modAdd(s, tmp, s, curve->n, num_n_words); /* s = e + r*d */ + uECC_vli_modMult(s, s, k, curve->n, num_n_words); /* s = (e + r*d) / k */ + if (uECC_vli_numBits(s, num_n_words) > (bitcount_t) curve->num_bytes * 8) { + return 0; + } +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + bcopy((uint8_t *) signature + curve->num_bytes, (uint8_t *) s, + curve->num_bytes); +#else + uECC_vli_nativeToBytes(signature + curve->num_bytes, curve->num_bytes, s); +#endif + return 1; +} + +#if 0 +/* For testing - sign with an explicitly specified k value */ +int uECC_sign_with_k(const uint8_t *private_key, const uint8_t *message_hash, + unsigned hash_size, const uint8_t *k, uint8_t *signature, + uECC_Curve curve) { + uECC_word_t k2[uECC_MAX_WORDS]; + bits2int(k2, k, (unsigned) BITS_TO_BYTES(curve->num_n_bits), curve); + return uECC_sign_with_k_internal(private_key, message_hash, hash_size, k2, + signature, curve); +} +#endif + +int uECC_sign(const uint8_t *private_key, const uint8_t *message_hash, + unsigned hash_size, uint8_t *signature, uECC_Curve curve) { + uECC_word_t k[uECC_MAX_WORDS]; + uECC_word_t tries; + + for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) { + if (!uECC_generate_random_int(k, curve->n, + BITS_TO_WORDS(curve->num_n_bits))) { + return 0; + } + + if (uECC_sign_with_k_internal(private_key, message_hash, hash_size, k, + signature, curve)) { + return 1; + } + } + return 0; +} + +/* Compute an HMAC using K as a key (as in RFC 6979). Note that K is always + the same size as the hash result size. */ +static void HMAC_init(const uECC_HashContext *hash_context, const uint8_t *K) { + uint8_t *pad = hash_context->tmp + 2 * hash_context->result_size; + unsigned i; + for (i = 0; i < hash_context->result_size; ++i) pad[i] = K[i] ^ 0x36; + for (; i < hash_context->block_size; ++i) pad[i] = 0x36; + + hash_context->init_hash(hash_context); + hash_context->update_hash(hash_context, pad, hash_context->block_size); +} + +static void HMAC_update(const uECC_HashContext *hash_context, + const uint8_t *message, unsigned message_size) { + hash_context->update_hash(hash_context, message, message_size); +} + +static void HMAC_finish(const uECC_HashContext *hash_context, const uint8_t *K, + uint8_t *result) { + uint8_t *pad = hash_context->tmp + 2 * hash_context->result_size; + unsigned i; + for (i = 0; i < hash_context->result_size; ++i) pad[i] = K[i] ^ 0x5c; + for (; i < hash_context->block_size; ++i) pad[i] = 0x5c; + + hash_context->finish_hash(hash_context, result); + + hash_context->init_hash(hash_context); + hash_context->update_hash(hash_context, pad, hash_context->block_size); + hash_context->update_hash(hash_context, result, hash_context->result_size); + hash_context->finish_hash(hash_context, result); +} + +/* V = HMAC_K(V) */ +static void update_V(const uECC_HashContext *hash_context, uint8_t *K, + uint8_t *V) { + HMAC_init(hash_context, K); + HMAC_update(hash_context, V, hash_context->result_size); + HMAC_finish(hash_context, K, V); +} + +/* Deterministic signing, similar to RFC 6979. Differences are: + * We just use H(m) directly rather than bits2octets(H(m)) + (it is not reduced modulo curve_n). + * We generate a value for k (aka T) directly rather than converting + endianness. + + Layout of hash_context->tmp: | | (1 byte overlapped 0x00 or 0x01) / + */ +int uECC_sign_deterministic(const uint8_t *private_key, + const uint8_t *message_hash, unsigned hash_size, + const uECC_HashContext *hash_context, + uint8_t *signature, uECC_Curve curve) { + uint8_t *K = hash_context->tmp; + uint8_t *V = K + hash_context->result_size; + wordcount_t num_bytes = curve->num_bytes; + wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits); + bitcount_t num_n_bits = curve->num_n_bits; + uECC_word_t tries; + unsigned i; + for (i = 0; i < hash_context->result_size; ++i) { + V[i] = 0x01; + K[i] = 0; + } + + /* K = HMAC_K(V || 0x00 || int2octets(x) || h(m)) */ + HMAC_init(hash_context, K); + V[hash_context->result_size] = 0x00; + HMAC_update(hash_context, V, hash_context->result_size + 1); + HMAC_update(hash_context, private_key, (unsigned int) num_bytes); + HMAC_update(hash_context, message_hash, hash_size); + HMAC_finish(hash_context, K, K); + + update_V(hash_context, K, V); + + /* K = HMAC_K(V || 0x01 || int2octets(x) || h(m)) */ + HMAC_init(hash_context, K); + V[hash_context->result_size] = 0x01; + HMAC_update(hash_context, V, hash_context->result_size + 1); + HMAC_update(hash_context, private_key, (unsigned int) num_bytes); + HMAC_update(hash_context, message_hash, hash_size); + HMAC_finish(hash_context, K, K); + + update_V(hash_context, K, V); + + for (tries = 0; tries < uECC_RNG_MAX_TRIES; ++tries) { + uECC_word_t T[uECC_MAX_WORDS]; + uint8_t *T_ptr = (uint8_t *) T; + wordcount_t T_bytes = 0; + for (;;) { + update_V(hash_context, K, V); + for (i = 0; i < hash_context->result_size; ++i) { + T_ptr[T_bytes++] = V[i]; + if (T_bytes >= num_n_words * uECC_WORD_SIZE) { + goto filled; + } + } + } + filled: + if ((bitcount_t) num_n_words * uECC_WORD_SIZE * 8 > num_n_bits) { + uECC_word_t mask = (uECC_word_t) -1; + T[num_n_words - 1] &= + mask >> + ((bitcount_t) (num_n_words * uECC_WORD_SIZE * 8 - num_n_bits)); + } + + if (uECC_sign_with_k_internal(private_key, message_hash, hash_size, T, + signature, curve)) { + return 1; + } + + /* K = HMAC_K(V || 0x00) */ + HMAC_init(hash_context, K); + V[hash_context->result_size] = 0x00; + HMAC_update(hash_context, V, hash_context->result_size + 1); + HMAC_finish(hash_context, K, K); + + update_V(hash_context, K, V); + } + return 0; +} + +static bitcount_t smax(bitcount_t a, bitcount_t b) { + return (a > b ? a : b); +} + +int uECC_verify(const uint8_t *public_key, const uint8_t *message_hash, + unsigned hash_size, const uint8_t *signature, + uECC_Curve curve) { + uECC_word_t u1[uECC_MAX_WORDS], u2[uECC_MAX_WORDS]; + uECC_word_t z[uECC_MAX_WORDS]; + uECC_word_t sum[uECC_MAX_WORDS * 2]; + uECC_word_t rx[uECC_MAX_WORDS]; + uECC_word_t ry[uECC_MAX_WORDS]; + uECC_word_t tx[uECC_MAX_WORDS]; + uECC_word_t ty[uECC_MAX_WORDS]; + uECC_word_t tz[uECC_MAX_WORDS]; + const uECC_word_t *points[4]; + const uECC_word_t *point; + bitcount_t num_bits; + bitcount_t i; +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + uECC_word_t *_public = (uECC_word_t *) public_key; +#else + uECC_word_t _public[uECC_MAX_WORDS * 2]; +#endif + uECC_word_t r[uECC_MAX_WORDS], s[uECC_MAX_WORDS]; + wordcount_t num_words = curve->num_words; + wordcount_t num_n_words = BITS_TO_WORDS(curve->num_n_bits); + + rx[num_n_words - 1] = 0; + r[num_n_words - 1] = 0; + s[num_n_words - 1] = 0; + +#if uECC_VLI_NATIVE_LITTLE_ENDIAN + bcopy((uint8_t *) r, signature, curve->num_bytes); + bcopy((uint8_t *) s, signature + curve->num_bytes, curve->num_bytes); +#else + uECC_vli_bytesToNative(_public, public_key, curve->num_bytes); + uECC_vli_bytesToNative(_public + num_words, public_key + curve->num_bytes, + curve->num_bytes); + uECC_vli_bytesToNative(r, signature, curve->num_bytes); + uECC_vli_bytesToNative(s, signature + curve->num_bytes, curve->num_bytes); +#endif + + /* r, s must not be 0. */ + if (uECC_vli_isZero(r, num_words) || uECC_vli_isZero(s, num_words)) { + return 0; + } + + /* r, s must be < n. */ + if (uECC_vli_cmp_unsafe(curve->n, r, num_n_words) != 1 || + uECC_vli_cmp_unsafe(curve->n, s, num_n_words) != 1) { + return 0; + } + + /* Calculate u1 and u2. */ + uECC_vli_modInv(z, s, curve->n, num_n_words); /* z = 1/s */ + u1[num_n_words - 1] = 0; + bits2int(u1, message_hash, hash_size, curve); + uECC_vli_modMult(u1, u1, z, curve->n, num_n_words); /* u1 = e/s */ + uECC_vli_modMult(u2, r, z, curve->n, num_n_words); /* u2 = r/s */ + + /* Calculate sum = G + Q. */ + uECC_vli_set(sum, _public, num_words); + uECC_vli_set(sum + num_words, _public + num_words, num_words); + uECC_vli_set(tx, curve->G, num_words); + uECC_vli_set(ty, curve->G + num_words, num_words); + uECC_vli_modSub(z, sum, tx, curve->p, num_words); /* z = x2 - x1 */ + XYcZ_add(tx, ty, sum, sum + num_words, curve); + uECC_vli_modInv(z, z, curve->p, num_words); /* z = 1/z */ + apply_z(sum, sum + num_words, z, curve); + + /* Use Shamir's trick to calculate u1*G + u2*Q */ + points[0] = 0; + points[1] = curve->G; + points[2] = _public; + points[3] = sum; + num_bits = smax(uECC_vli_numBits(u1, num_n_words), + uECC_vli_numBits(u2, num_n_words)); + point = points[(!!uECC_vli_testBit(u1, (bitcount_t) (num_bits - 1))) | + ((!!uECC_vli_testBit(u2, (bitcount_t) (num_bits - 1))) << 1)]; + uECC_vli_set(rx, point, num_words); + uECC_vli_set(ry, point + num_words, num_words); + uECC_vli_clear(z, num_words); + z[0] = 1; + + for (i = num_bits - 2; i >= 0; --i) { + uECC_word_t index; + curve->double_jacobian(rx, ry, z, curve); + + index = (!!uECC_vli_testBit(u1, i)) | + (uECC_word_t) ((!!uECC_vli_testBit(u2, i)) << 1); + point = points[index]; + if (point) { + uECC_vli_set(tx, point, num_words); + uECC_vli_set(ty, point + num_words, num_words); + apply_z(tx, ty, z, curve); + uECC_vli_modSub(tz, rx, tx, curve->p, num_words); /* Z = x2 - x1 */ + XYcZ_add(tx, ty, rx, ry, curve); + uECC_vli_modMult_fast(z, z, tz, curve); + } + } + + uECC_vli_modInv(z, z, curve->p, num_words); /* Z = 1/Z */ + apply_z(rx, ry, z, curve); + + /* v = x1 (mod n) */ + if (uECC_vli_cmp_unsafe(curve->n, rx, num_n_words) != 1) { + uECC_vli_sub(rx, rx, curve->n, num_n_words); + } + + /* Accept only if v == r. */ + return (int) (uECC_vli_equal(rx, r, num_words)); +} + +#if uECC_ENABLE_VLI_API + +unsigned uECC_curve_num_words(uECC_Curve curve) { + return curve->num_words; +} + +unsigned uECC_curve_num_bytes(uECC_Curve curve) { + return curve->num_bytes; +} + +unsigned uECC_curve_num_bits(uECC_Curve curve) { + return curve->num_bytes * 8; +} + +unsigned uECC_curve_num_n_words(uECC_Curve curve) { + return BITS_TO_WORDS(curve->num_n_bits); +} + +unsigned uECC_curve_num_n_bytes(uECC_Curve curve) { + return BITS_TO_BYTES(curve->num_n_bits); +} + +unsigned uECC_curve_num_n_bits(uECC_Curve curve) { + return curve->num_n_bits; +} + +const uECC_word_t *uECC_curve_p(uECC_Curve curve) { + return curve->p; +} + +const uECC_word_t *uECC_curve_n(uECC_Curve curve) { + return curve->n; +} + +const uECC_word_t *uECC_curve_G(uECC_Curve curve) { + return curve->G; +} + +const uECC_word_t *uECC_curve_b(uECC_Curve curve) { + return curve->b; +} + +#if uECC_SUPPORT_COMPRESSED_POINT +void uECC_vli_mod_sqrt(uECC_word_t *a, uECC_Curve curve) { + curve->mod_sqrt(a, curve); +} +#endif + +void uECC_vli_mmod_fast(uECC_word_t *result, uECC_word_t *product, + uECC_Curve curve) { +#if (uECC_OPTIMIZATION_LEVEL > 0) + curve->mmod_fast(result, product); +#else + uECC_vli_mmod(result, product, curve->p, curve->num_words); +#endif +} + +void uECC_point_mult(uECC_word_t *result, const uECC_word_t *point, + const uECC_word_t *scalar, uECC_Curve curve) { + uECC_word_t tmp1[uECC_MAX_WORDS]; + uECC_word_t tmp2[uECC_MAX_WORDS]; + uECC_word_t *p2[2] = {tmp1, tmp2}; + uECC_word_t carry = regularize_k(scalar, tmp1, tmp2, curve); + + EccPoint_mult(result, point, p2[!carry], 0, curve->num_n_bits + 1, curve); +} + +#endif /* uECC_ENABLE_VLI_API */ +#endif // MG_TLS_BUILTIN diff --git a/src/tls_uecc.h b/src/tls_uecc.h new file mode 100644 index 00000000..6b56de18 --- /dev/null +++ b/src/tls_uecc.h @@ -0,0 +1,638 @@ +#pragma once +#include "arch.h" + +#define uECC_SUPPORTS_secp256r1 1 +/* Copyright 2014, Kenneth MacKay. Licensed under the BSD 2-clause license. */ + +#ifndef _UECC_H_ +#define _UECC_H_ + +/* Platform selection options. +If uECC_PLATFORM is not defined, the code will try to guess it based on compiler +macros. Possible values for uECC_PLATFORM are defined below: */ +#define uECC_arch_other 0 +#define uECC_x86 1 +#define uECC_x86_64 2 +#define uECC_arm 3 +#define uECC_arm_thumb 4 +#define uECC_arm_thumb2 5 +#define uECC_arm64 6 +#define uECC_avr 7 + +/* If desired, you can define uECC_WORD_SIZE as appropriate for your platform +(1, 4, or 8 bytes). If uECC_WORD_SIZE is not explicitly defined then it will be +automatically set based on your platform. */ + +/* Optimization level; trade speed for code size. + Larger values produce code that is faster but larger. + Currently supported values are 0 - 4; 0 is unusably slow for most + applications. Optimization level 4 currently only has an effect ARM platforms + where more than one curve is enabled. */ +#ifndef uECC_OPTIMIZATION_LEVEL +#define uECC_OPTIMIZATION_LEVEL 2 +#endif + +/* uECC_SQUARE_FUNC - If enabled (defined as nonzero), this will cause a +specific function to be used for (scalar) squaring instead of the generic +multiplication function. This can make things faster somewhat faster, but +increases the code size. */ +#ifndef uECC_SQUARE_FUNC +#define uECC_SQUARE_FUNC 0 +#endif + +/* uECC_VLI_NATIVE_LITTLE_ENDIAN - If enabled (defined as nonzero), this will +switch to native little-endian format for *all* arrays passed in and out of the +public API. This includes public and private keys, shared secrets, signatures +and message hashes. Using this switch reduces the amount of call stack memory +used by uECC, since less intermediate translations are required. Note that this +will *only* work on native little-endian processors and it will treat the +uint8_t arrays passed into the public API as word arrays, therefore requiring +the provided byte arrays to be word aligned on architectures that do not support +unaligned accesses. IMPORTANT: Keys and signatures generated with +uECC_VLI_NATIVE_LITTLE_ENDIAN=1 are incompatible with keys and signatures +generated with uECC_VLI_NATIVE_LITTLE_ENDIAN=0; all parties must use the same +endianness. */ +#ifndef uECC_VLI_NATIVE_LITTLE_ENDIAN +#define uECC_VLI_NATIVE_LITTLE_ENDIAN 0 +#endif + +/* Curve support selection. Set to 0 to remove that curve. */ +#ifndef uECC_SUPPORTS_secp160r1 +#define uECC_SUPPORTS_secp160r1 0 +#endif +#ifndef uECC_SUPPORTS_secp192r1 +#define uECC_SUPPORTS_secp192r1 0 +#endif +#ifndef uECC_SUPPORTS_secp224r1 +#define uECC_SUPPORTS_secp224r1 0 +#endif +#ifndef uECC_SUPPORTS_secp256r1 +#define uECC_SUPPORTS_secp256r1 1 +#endif +#ifndef uECC_SUPPORTS_secp256k1 +#define uECC_SUPPORTS_secp256k1 0 +#endif + +/* Specifies whether compressed point format is supported. + Set to 0 to disable point compression/decompression functions. */ +#ifndef uECC_SUPPORT_COMPRESSED_POINT +#define uECC_SUPPORT_COMPRESSED_POINT 1 +#endif + +struct uECC_Curve_t; +typedef const struct uECC_Curve_t *uECC_Curve; + +#ifdef __cplusplus +extern "C" { +#endif + +#if uECC_SUPPORTS_secp160r1 +uECC_Curve uECC_secp160r1(void); +#endif +#if uECC_SUPPORTS_secp192r1 +uECC_Curve uECC_secp192r1(void); +#endif +#if uECC_SUPPORTS_secp224r1 +uECC_Curve uECC_secp224r1(void); +#endif +#if uECC_SUPPORTS_secp256r1 +uECC_Curve uECC_secp256r1(void); +#endif +#if uECC_SUPPORTS_secp256k1 +uECC_Curve uECC_secp256k1(void); +#endif + +/* uECC_RNG_Function type +The RNG function should fill 'size' random bytes into 'dest'. It should return 1 +if 'dest' was filled with random data, or 0 if the random data could not be +generated. The filled-in values should be either truly random, or from a +cryptographically-secure PRNG. + +A correctly functioning RNG function must be set (using uECC_set_rng()) before +calling uECC_make_key() or uECC_sign(). + +Setting a correctly functioning RNG function improves the resistance to +side-channel attacks for uECC_shared_secret() and uECC_sign_deterministic(). + +A correct RNG function is set by default when building for Windows, Linux, or OS +X. If you are building on another POSIX-compliant system that supports +/dev/random or /dev/urandom, you can define uECC_POSIX to use the predefined +RNG. For embedded platforms there is no predefined RNG function; you must +provide your own. +*/ +typedef int (*uECC_RNG_Function)(uint8_t *dest, unsigned size); + +/* uECC_set_rng() function. +Set the function that will be used to generate random bytes. The RNG function +should return 1 if the random data was generated, or 0 if the random data could +not be generated. + +On platforms where there is no predefined RNG function (eg embedded platforms), +this must be called before uECC_make_key() or uECC_sign() are used. + +Inputs: + rng_function - The function that will be used to generate random bytes. +*/ +void uECC_set_rng(uECC_RNG_Function rng_function); + +/* uECC_get_rng() function. + +Returns the function that will be used to generate random bytes. +*/ +uECC_RNG_Function uECC_get_rng(void); + +/* uECC_curve_private_key_size() function. + +Returns the size of a private key for the curve in bytes. +*/ +int uECC_curve_private_key_size(uECC_Curve curve); + +/* uECC_curve_public_key_size() function. + +Returns the size of a public key for the curve in bytes. +*/ +int uECC_curve_public_key_size(uECC_Curve curve); + +/* uECC_make_key() function. +Create a public/private key pair. + +Outputs: + public_key - Will be filled in with the public key. Must be at least 2 * +the curve size (in bytes) long. For example, if the curve is secp256r1, +public_key must be 64 bytes long. private_key - Will be filled in with the +private key. Must be as long as the curve order; this is typically the same as +the curve size, except for secp160r1. For example, if the curve is secp256r1, +private_key must be 32 bytes long. + + For secp160r1, private_key must be 21 bytes long! Note that +the first byte will almost always be 0 (there is about a 1 in 2^80 chance of it +being non-zero). + +Returns 1 if the key pair was generated successfully, 0 if an error occurred. +*/ +int uECC_make_key(uint8_t *public_key, uint8_t *private_key, uECC_Curve curve); + +/* uECC_shared_secret() function. +Compute a shared secret given your secret key and someone else's public key. If +the public key is not from a trusted source and has not been previously +verified, you should verify it first using uECC_valid_public_key(). Note: It is +recommended that you hash the result of uECC_shared_secret() before using it for +symmetric encryption or HMAC. + +Inputs: + public_key - The public key of the remote party. + private_key - Your private key. + +Outputs: + secret - Will be filled in with the shared secret value. Must be the same +size as the curve size; for example, if the curve is secp256r1, secret must be +32 bytes long. + +Returns 1 if the shared secret was generated successfully, 0 if an error +occurred. +*/ +int uECC_shared_secret(const uint8_t *public_key, const uint8_t *private_key, + uint8_t *secret, uECC_Curve curve); + +#if uECC_SUPPORT_COMPRESSED_POINT +/* uECC_compress() function. +Compress a public key. + +Inputs: + public_key - The public key to compress. + +Outputs: + compressed - Will be filled in with the compressed public key. Must be at +least (curve size + 1) bytes long; for example, if the curve is secp256r1, + compressed must be 33 bytes long. +*/ +void uECC_compress(const uint8_t *public_key, uint8_t *compressed, + uECC_Curve curve); + +/* uECC_decompress() function. +Decompress a compressed public key. + +Inputs: + compressed - The compressed public key. + +Outputs: + public_key - Will be filled in with the decompressed public key. +*/ +void uECC_decompress(const uint8_t *compressed, uint8_t *public_key, + uECC_Curve curve); +#endif /* uECC_SUPPORT_COMPRESSED_POINT */ + +/* uECC_valid_public_key() function. +Check to see if a public key is valid. + +Note that you are not required to check for a valid public key before using any +other uECC functions. However, you may wish to avoid spending CPU time computing +a shared secret or verifying a signature using an invalid public key. + +Inputs: + public_key - The public key to check. + +Returns 1 if the public key is valid, 0 if it is invalid. +*/ +int uECC_valid_public_key(const uint8_t *public_key, uECC_Curve curve); + +/* uECC_compute_public_key() function. +Compute the corresponding public key for a private key. + +Inputs: + private_key - The private key to compute the public key for + +Outputs: + public_key - Will be filled in with the corresponding public key + +Returns 1 if the key was computed successfully, 0 if an error occurred. +*/ +int uECC_compute_public_key(const uint8_t *private_key, uint8_t *public_key, + uECC_Curve curve); + +/* uECC_sign() function. +Generate an ECDSA signature for a given hash value. + +Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and +pass it in to this function along with your private key. + +Inputs: + private_key - Your private key. + message_hash - The hash of the message to sign. + hash_size - The size of message_hash in bytes. + +Outputs: + signature - Will be filled in with the signature value. Must be at least 2 * +curve size long. For example, if the curve is secp256r1, signature must be 64 +bytes long. + +Returns 1 if the signature generated successfully, 0 if an error occurred. +*/ +int uECC_sign(const uint8_t *private_key, const uint8_t *message_hash, + unsigned hash_size, uint8_t *signature, uECC_Curve curve); + +/* uECC_HashContext structure. +This is used to pass in an arbitrary hash function to uECC_sign_deterministic(). +The structure will be used for multiple hash computations; each time a new hash +is computed, init_hash() will be called, followed by one or more calls to +update_hash(), and finally a call to finish_hash() to produce the resulting +hash. + +The intention is that you will create a structure that includes uECC_HashContext +followed by any hash-specific data. For example: + +typedef struct SHA256_HashContext { + uECC_HashContext uECC; + SHA256_CTX ctx; +} SHA256_HashContext; + +void init_SHA256(uECC_HashContext *base) { + SHA256_HashContext *context = (SHA256_HashContext *)base; + SHA256_Init(&context->ctx); +} + +void update_SHA256(uECC_HashContext *base, + const uint8_t *message, + unsigned message_size) { + SHA256_HashContext *context = (SHA256_HashContext *)base; + SHA256_Update(&context->ctx, message, message_size); +} + +void finish_SHA256(uECC_HashContext *base, uint8_t *hash_result) { + SHA256_HashContext *context = (SHA256_HashContext *)base; + SHA256_Final(hash_result, &context->ctx); +} + +... when signing ... +{ + uint8_t tmp[32 + 32 + 64]; + SHA256_HashContext ctx = {{&init_SHA256, &update_SHA256, &finish_SHA256, 64, +32, tmp}}; uECC_sign_deterministic(key, message_hash, &ctx.uECC, signature); +} +*/ +typedef struct uECC_HashContext { + void (*init_hash)(const struct uECC_HashContext *context); + void (*update_hash)(const struct uECC_HashContext *context, + const uint8_t *message, unsigned message_size); + void (*finish_hash)(const struct uECC_HashContext *context, + uint8_t *hash_result); + unsigned + block_size; /* Hash function block size in bytes, eg 64 for SHA-256. */ + unsigned + result_size; /* Hash function result size in bytes, eg 32 for SHA-256. */ + uint8_t *tmp; /* Must point to a buffer of at least (2 * result_size + + block_size) bytes. */ +} uECC_HashContext; + +/* uECC_sign_deterministic() function. +Generate an ECDSA signature for a given hash value, using a deterministic +algorithm (see RFC 6979). You do not need to set the RNG using uECC_set_rng() +before calling this function; however, if the RNG is defined it will improve +resistance to side-channel attacks. + +Usage: Compute a hash of the data you wish to sign (SHA-2 is recommended) and +pass it to this function along with your private key and a hash context. Note +that the message_hash does not need to be computed with the same hash function +used by hash_context. + +Inputs: + private_key - Your private key. + message_hash - The hash of the message to sign. + hash_size - The size of message_hash in bytes. + hash_context - A hash context to use. + +Outputs: + signature - Will be filled in with the signature value. + +Returns 1 if the signature generated successfully, 0 if an error occurred. +*/ +int uECC_sign_deterministic(const uint8_t *private_key, + const uint8_t *message_hash, unsigned hash_size, + const uECC_HashContext *hash_context, + uint8_t *signature, uECC_Curve curve); + +/* uECC_verify() function. +Verify an ECDSA signature. + +Usage: Compute the hash of the signed data using the same hash as the signer and +pass it to this function along with the signer's public key and the signature +values (r and s). + +Inputs: + public_key - The signer's public key. + message_hash - The hash of the signed data. + hash_size - The size of message_hash in bytes. + signature - The signature value. + +Returns 1 if the signature is valid, 0 if it is invalid. +*/ +int uECC_verify(const uint8_t *public_key, const uint8_t *message_hash, + unsigned hash_size, const uint8_t *signature, uECC_Curve curve); + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* _UECC_H_ */ + +/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ + +#ifndef _UECC_VLI_H_ +#define _UECC_VLI_H_ + +//#include "types.h" +//#include "uECC.h" + +/* Functions for raw large-integer manipulation. These are only available + if uECC.c is compiled with uECC_ENABLE_VLI_API defined to 1. */ +#ifndef uECC_ENABLE_VLI_API +#define uECC_ENABLE_VLI_API 0 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#if uECC_ENABLE_VLI_API + +void uECC_vli_clear(uECC_word_t *vli, wordcount_t num_words); + +/* Constant-time comparison to zero - secure way to compare long integers */ +/* Returns 1 if vli == 0, 0 otherwise. */ +uECC_word_t uECC_vli_isZero(const uECC_word_t *vli, wordcount_t num_words); + +/* Returns nonzero if bit 'bit' of vli is set. */ +uECC_word_t uECC_vli_testBit(const uECC_word_t *vli, bitcount_t bit); + +/* Counts the number of bits required to represent vli. */ +bitcount_t uECC_vli_numBits(const uECC_word_t *vli, + const wordcount_t max_words); + +/* Sets dest = src. */ +void uECC_vli_set(uECC_word_t *dest, const uECC_word_t *src, + wordcount_t num_words); + +/* Constant-time comparison function - secure way to compare long integers */ +/* Returns one if left == right, zero otherwise */ +uECC_word_t uECC_vli_equal(const uECC_word_t *left, const uECC_word_t *right, + wordcount_t num_words); + +/* Constant-time comparison function - secure way to compare long integers */ +/* Returns sign of left - right, in constant time. */ +cmpresult_t uECC_vli_cmp(const uECC_word_t *left, const uECC_word_t *right, + wordcount_t num_words); + +/* Computes vli = vli >> 1. */ +void uECC_vli_rshift1(uECC_word_t *vli, wordcount_t num_words); + +/* Computes result = left + right, returning carry. Can modify in place. */ +uECC_word_t uECC_vli_add(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, wordcount_t num_words); + +/* Computes result = left - right, returning borrow. Can modify in place. */ +uECC_word_t uECC_vli_sub(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, wordcount_t num_words); + +/* Computes result = left * right. Result must be 2 * num_words long. */ +void uECC_vli_mult(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, wordcount_t num_words); + +/* Computes result = left^2. Result must be 2 * num_words long. */ +void uECC_vli_square(uECC_word_t *result, const uECC_word_t *left, + wordcount_t num_words); + +/* Computes result = (left + right) % mod. + Assumes that left < mod and right < mod, and that result does not overlap + mod. */ +void uECC_vli_modAdd(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, const uECC_word_t *mod, + wordcount_t num_words); + +/* Computes result = (left - right) % mod. + Assumes that left < mod and right < mod, and that result does not overlap + mod. */ +void uECC_vli_modSub(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, const uECC_word_t *mod, + wordcount_t num_words); + +/* Computes result = product % mod, where product is 2N words long. + Currently only designed to work for mod == curve->p or curve_n. */ +void uECC_vli_mmod(uECC_word_t *result, uECC_word_t *product, + const uECC_word_t *mod, wordcount_t num_words); + +/* Calculates result = product (mod curve->p), where product is up to + 2 * curve->num_words long. */ +void uECC_vli_mmod_fast(uECC_word_t *result, uECC_word_t *product, + uECC_Curve curve); + +/* Computes result = (left * right) % mod. + Currently only designed to work for mod == curve->p or curve_n. */ +void uECC_vli_modMult(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, const uECC_word_t *mod, + wordcount_t num_words); + +/* Computes result = (left * right) % curve->p. */ +void uECC_vli_modMult_fast(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *right, uECC_Curve curve); + +/* Computes result = left^2 % mod. + Currently only designed to work for mod == curve->p or curve_n. */ +void uECC_vli_modSquare(uECC_word_t *result, const uECC_word_t *left, + const uECC_word_t *mod, wordcount_t num_words); + +/* Computes result = left^2 % curve->p. */ +void uECC_vli_modSquare_fast(uECC_word_t *result, const uECC_word_t *left, + uECC_Curve curve); + +/* Computes result = (1 / input) % mod.*/ +void uECC_vli_modInv(uECC_word_t *result, const uECC_word_t *input, + const uECC_word_t *mod, wordcount_t num_words); + +#if uECC_SUPPORT_COMPRESSED_POINT +/* Calculates a = sqrt(a) (mod curve->p) */ +void uECC_vli_mod_sqrt(uECC_word_t *a, uECC_Curve curve); +#endif + +/* Converts an integer in uECC native format to big-endian bytes. */ +void uECC_vli_nativeToBytes(uint8_t *bytes, int num_bytes, + const uECC_word_t *native); +/* Converts big-endian bytes to an integer in uECC native format. */ +void uECC_vli_bytesToNative(uECC_word_t *native, const uint8_t *bytes, + int num_bytes); + +unsigned uECC_curve_num_words(uECC_Curve curve); +unsigned uECC_curve_num_bytes(uECC_Curve curve); +unsigned uECC_curve_num_bits(uECC_Curve curve); +unsigned uECC_curve_num_n_words(uECC_Curve curve); +unsigned uECC_curve_num_n_bytes(uECC_Curve curve); +unsigned uECC_curve_num_n_bits(uECC_Curve curve); + +const uECC_word_t *uECC_curve_p(uECC_Curve curve); +const uECC_word_t *uECC_curve_n(uECC_Curve curve); +const uECC_word_t *uECC_curve_G(uECC_Curve curve); +const uECC_word_t *uECC_curve_b(uECC_Curve curve); + +int uECC_valid_point(const uECC_word_t *point, uECC_Curve curve); + +/* Multiplies a point by a scalar. Points are represented by the X coordinate + followed by the Y coordinate in the same array, both coordinates are + curve->num_words long. Note that scalar must be curve->num_n_words long (NOT + curve->num_words). */ +void uECC_point_mult(uECC_word_t *result, const uECC_word_t *point, + const uECC_word_t *scalar, uECC_Curve curve); + +/* Generates a random integer in the range 0 < random < top. + Both random and top have num_words words. */ +int uECC_generate_random_int(uECC_word_t *random, const uECC_word_t *top, + wordcount_t num_words); + +#endif /* uECC_ENABLE_VLI_API */ + +#ifdef __cplusplus +} /* end of extern "C" */ +#endif + +#endif /* _UECC_VLI_H_ */ + +/* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */ + +#ifndef _UECC_TYPES_H_ +#define _UECC_TYPES_H_ + +#ifndef uECC_PLATFORM +#if defined(__AVR__) && __AVR__ +#define uECC_PLATFORM uECC_avr +#elif defined(__thumb2__) || \ + defined(_M_ARMT) /* I think MSVC only supports Thumb-2 targets */ +#define uECC_PLATFORM uECC_arm_thumb2 +#elif defined(__thumb__) +#define uECC_PLATFORM uECC_arm_thumb +#elif defined(__arm__) || defined(_M_ARM) +#define uECC_PLATFORM uECC_arm +#elif defined(__aarch64__) +#define uECC_PLATFORM uECC_arm64 +#elif defined(__i386__) || defined(_M_IX86) || defined(_X86_) || \ + defined(__I86__) +#define uECC_PLATFORM uECC_x86 +#elif defined(__amd64__) || defined(_M_X64) +#define uECC_PLATFORM uECC_x86_64 +#else +#define uECC_PLATFORM uECC_arch_other +#endif +#endif + +#ifndef uECC_ARM_USE_UMAAL +#if (uECC_PLATFORM == uECC_arm) && (__ARM_ARCH >= 6) +#define uECC_ARM_USE_UMAAL 1 +#elif (uECC_PLATFORM == uECC_arm_thumb2) && (__ARM_ARCH >= 6) && \ + (!defined(__ARM_ARCH_7M__) || !__ARM_ARCH_7M__) +#define uECC_ARM_USE_UMAAL 1 +#else +#define uECC_ARM_USE_UMAAL 0 +#endif +#endif + +#ifndef uECC_WORD_SIZE +#if uECC_PLATFORM == uECC_avr +#define uECC_WORD_SIZE 1 +#elif (uECC_PLATFORM == uECC_x86_64 || uECC_PLATFORM == uECC_arm64) +#define uECC_WORD_SIZE 8 +#else +#define uECC_WORD_SIZE 4 +#endif +#endif + +#if (uECC_WORD_SIZE != 1) && (uECC_WORD_SIZE != 4) && (uECC_WORD_SIZE != 8) +#error "Unsupported value for uECC_WORD_SIZE" +#endif + +#if ((uECC_PLATFORM == uECC_avr) && (uECC_WORD_SIZE != 1)) +#pragma message("uECC_WORD_SIZE must be 1 for AVR") +#undef uECC_WORD_SIZE +#define uECC_WORD_SIZE 1 +#endif + +#if ((uECC_PLATFORM == uECC_arm || uECC_PLATFORM == uECC_arm_thumb || \ + uECC_PLATFORM == uECC_arm_thumb2) && \ + (uECC_WORD_SIZE != 4)) +#pragma message("uECC_WORD_SIZE must be 4 for ARM") +#undef uECC_WORD_SIZE +#define uECC_WORD_SIZE 4 +#endif + +typedef int8_t wordcount_t; +typedef int16_t bitcount_t; +typedef int8_t cmpresult_t; + +#if (uECC_WORD_SIZE == 1) + +typedef uint8_t uECC_word_t; +typedef uint16_t uECC_dword_t; + +#define HIGH_BIT_SET 0x80 +#define uECC_WORD_BITS 8 +#define uECC_WORD_BITS_SHIFT 3 +#define uECC_WORD_BITS_MASK 0x07 + +#elif (uECC_WORD_SIZE == 4) + +typedef uint32_t uECC_word_t; +typedef uint64_t uECC_dword_t; + +#define HIGH_BIT_SET 0x80000000 +#define uECC_WORD_BITS 32 +#define uECC_WORD_BITS_SHIFT 5 +#define uECC_WORD_BITS_MASK 0x01F + +#elif (uECC_WORD_SIZE == 8) + +typedef uint64_t uECC_word_t; + +#define HIGH_BIT_SET 0x8000000000000000U +#define uECC_WORD_BITS 64 +#define uECC_WORD_BITS_SHIFT 6 +#define uECC_WORD_BITS_MASK 0x03F + +#endif /* uECC_WORD_SIZE */ + +#endif /* _UECC_TYPES_H_ */