arch/arm/Kconfig | 4 +- arch/arm64/Kconfig | 3 +- .../boot/dts/rockchip/rk3399-pinebook-pro.dts | 6 +- arch/s390/include/asm/ipl.h | 1 + arch/s390/kernel/ipl.c | 5 + arch/s390/kernel/setup.c | 4 + arch/x86/kernel/setup.c | 22 +- crypto/rng.c | 73 ++- drivers/acpi/apei/hest.c | 8 + drivers/acpi/irq.c | 17 +- drivers/acpi/scan.c | 9 + drivers/ata/libahci.c | 18 + drivers/char/ipmi/ipmi_dmi.c | 15 + drivers/char/ipmi/ipmi_msghandler.c | 16 +- drivers/char/random.c | 115 +++++ drivers/firmware/efi/Makefile | 1 + drivers/firmware/efi/efi.c | 124 +++-- drivers/firmware/efi/secureboot.c | 38 ++ drivers/hid/hid-rmi.c | 64 --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 19 + drivers/input/rmi4/rmi_driver.c | 124 +++-- drivers/iommu/iommu.c | 22 + drivers/net/ethernet/intel/e1000e/e1000.h | 4 +- drivers/net/ethernet/intel/e1000e/ich8lan.c | 31 +- drivers/net/ethernet/intel/e1000e/ich8lan.h | 3 + drivers/net/ethernet/intel/e1000e/netdev.c | 29 +- drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c | 4 + drivers/net/wireguard/main.c | 6 + drivers/pci/quirks.c | 24 + drivers/usb/core/hub.c | 7 + fs/xfs/xfs_super.c | 8 - include/linux/efi.h | 22 +- include/linux/lsm_hook_defs.h | 2 + include/linux/lsm_hooks.h | 6 + include/linux/random.h | 7 + include/linux/rmi.h | 1 + include/linux/security.h | 5 + init/Kconfig | 2 +- kernel/module_signing.c | 9 +- mm/cma.c | 10 + security/integrity/platform_certs/load_uefi.c | 6 +- security/lockdown/Kconfig | 13 + security/lockdown/lockdown.c | 1 + security/security.c | 6 + tools/testing/selftests/bpf/Makefile | 1 - .../selftests/bpf/prog_tests/linked_funcs.c | 42 -- .../testing/selftests/bpf/prog_tests/linked_maps.c | 30 -- .../testing/selftests/bpf/prog_tests/linked_vars.c | 43 -- tools/testing/selftests/bpf/progs/bpf_cubic.c | 545 --------------------- tools/testing/selftests/bpf/progs/bpf_dctcp.c | 224 --------- .../testing/selftests/bpf/progs/kfunc_call_test.c | 47 -- .../selftests/bpf/progs/kfunc_call_test_subprog.c | 42 -- tools/testing/selftests/bpf/progs/linked_funcs1.c | 73 --- tools/testing/selftests/bpf/progs/linked_funcs2.c | 73 --- tools/testing/selftests/bpf/progs/linked_maps2.c | 76 --- tools/testing/selftests/bpf/progs/linked_vars1.c | 54 -- tools/testing/selftests/bpf/progs/linked_vars2.c | 55 --- 57 files changed, 709 insertions(+), 1510 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2fb7012c3246..47718b4f2f75 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1474,9 +1474,9 @@ config HIGHMEM If unsure, say n. config HIGHPTE - bool "Allocate 2nd-level pagetables from highmem" if EXPERT + bool "Allocate 2nd-level pagetables from highmem" depends on HIGHMEM - default y + default n help The VM uses one page of physical memory for each page table. For systems with a lot of processes, this can use a lot of diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 62c3c1d2190f..9ef6bb1d5b0c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -915,7 +915,7 @@ endchoice config ARM64_FORCE_52BIT bool "Force 52-bit virtual addresses for userspace" - depends on ARM64_VA_BITS_52 && EXPERT + depends on ARM64_VA_BITS_52 help For systems with 52-bit userspace VAs enabled, the kernel will attempt to maintain compatibility with older software by providing 48-bit VAs @@ -1156,6 +1156,7 @@ config XEN config FORCE_MAX_ZONEORDER int default "14" if ARM64_64K_PAGES + default "13" if (ARCH_THUNDER && !ARM64_64K_PAGES) default "12" if ARM64_16K_PAGES default "11" help diff --git a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts index 2b5f001ff4a6..dae8c252bc2b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-pinebook-pro.dts @@ -385,10 +385,6 @@ mains_charger: dc-charger { }; }; -&cdn_dp { - status = "okay"; -}; - &cpu_b0 { cpu-supply = <&vdd_cpu_b>; }; @@ -711,7 +707,7 @@ fusb0: fusb30x@22 { connector { compatible = "usb-c-connector"; - data-role = "host"; + data-role = "dual"; label = "USB-C"; op-sink-microwatt = <1000000>; power-role = "dual"; diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index a9e2c7295b35..6ff11f3a2d47 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -127,6 +127,7 @@ int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf, unsigned char flags, unsigned short cert); int ipl_report_add_certificate(struct ipl_report *report, void *key, unsigned long addr, unsigned long len); +bool ipl_get_secureboot(void); /* * DIAG 308 support diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 911cd3912351..caae31c04561 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2215,3 +2215,8 @@ int ipl_report_free(struct ipl_report *report) } #endif + +bool ipl_get_secureboot(void) +{ + return !!ipl_secure_flag; +} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 6f0d2d4dea74..2f4d60b24155 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -1034,6 +1035,9 @@ void __init setup_arch(char **cmdline_p) log_component_list(); + if (ipl_get_secureboot()) + security_lock_kernel_down("Secure IPL mode", LOCKDOWN_INTEGRITY_MAX); + /* Have one command line that is parsed and saved in /proc/cmdline */ /* boot_command_line has been already set up in early.c */ *cmdline_p = boot_command_line; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d103e8489ec1..16ef6bcceed2 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -938,6 +939,13 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled(EFI_BOOT)) efi_init(); + efi_set_secure_boot(boot_params.secure_boot); + +#ifdef CONFIG_LOCK_DOWN_IN_EFI_SECURE_BOOT + if (efi_enabled(EFI_SECURE_BOOT)) + security_lock_kernel_down("EFI Secure Boot mode", LOCKDOWN_INTEGRITY_MAX); +#endif + dmi_setup(); /* @@ -1103,19 +1111,7 @@ void __init setup_arch(char **cmdline_p) /* Allocate bigger log buffer */ setup_log_buf(1); - if (efi_enabled(EFI_BOOT)) { - switch (boot_params.secure_boot) { - case efi_secureboot_mode_disabled: - pr_info("Secure boot disabled\n"); - break; - case efi_secureboot_mode_enabled: - pr_info("Secure boot enabled\n"); - break; - default: - pr_info("Secure boot could not be determined\n"); - break; - } - } + efi_set_secure_boot(boot_params.secure_boot); reserve_initrd(); diff --git a/crypto/rng.c b/crypto/rng.c index fea082b25fe4..50a9d040bed1 100644 --- a/crypto/rng.c +++ b/crypto/rng.c @@ -11,14 +11,17 @@ #include #include #include +#include +#include #include #include #include #include +#include +#include #include #include #include -#include #include #include "internal.h" @@ -224,5 +227,73 @@ void crypto_unregister_rngs(struct rng_alg *algs, int count) } EXPORT_SYMBOL_GPL(crypto_unregister_rngs); +static ssize_t crypto_devrandom_read(void __user *buf, size_t buflen) +{ + u8 tmp[256]; + ssize_t ret; + + if (!buflen) + return 0; + + ret = crypto_get_default_rng(); + if (ret) + return ret; + + for (;;) { + int err; + int i; + + i = min_t(int, buflen, sizeof(tmp)); + err = crypto_rng_get_bytes(crypto_default_rng, tmp, i); + if (err) { + ret = err; + break; + } + + if (copy_to_user(buf, tmp, i)) { + ret = -EFAULT; + break; + } + + buflen -= i; + buf += i; + ret += i; + + if (!buflen) + break; + + if (need_resched()) { + if (signal_pending(current)) + break; + schedule(); + } + } + + crypto_put_default_rng(); + memzero_explicit(tmp, sizeof(tmp)); + + return ret; +} + +static const struct random_extrng crypto_devrandom_rng = { + .extrng_read = crypto_devrandom_read, + .owner = THIS_MODULE, +}; + +static int __init crypto_rng_init(void) +{ + if (fips_enabled) + random_register_extrng(&crypto_devrandom_rng); + return 0; +} + +static void __exit crypto_rng_exit(void) +{ + random_unregister_extrng(); +} + +late_initcall(crypto_rng_init); +module_exit(crypto_rng_exit); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Random Number Generator"); diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 277f00b288d1..adbce15c273d 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -94,6 +94,14 @@ int apei_hest_parse(apei_hest_func_t func, void *data) if (hest_disable || !hest_tab) return -EINVAL; +#ifdef CONFIG_ARM64 + /* Ignore broken firmware */ + if (!strncmp(hest_tab->header.oem_id, "HPE ", 6) && + !strncmp(hest_tab->header.oem_table_id, "ProLiant", 8) && + MIDR_IMPLEMENTOR(read_cpuid_id()) == ARM_CPU_IMP_APM) + return -EINVAL; +#endif + hest_hdr = (struct acpi_hest_header *)(hest_tab + 1); for (i = 0; i < hest_tab->error_source_count; i++) { len = hest_esrc_len(hest_hdr); diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c index c68e694fca26..146cba5ae5bc 100644 --- a/drivers/acpi/irq.c +++ b/drivers/acpi/irq.c @@ -130,6 +130,7 @@ struct acpi_irq_parse_one_ctx { unsigned int index; unsigned long *res_flags; struct irq_fwspec *fwspec; + bool skip_producer_check; }; /** @@ -201,7 +202,8 @@ static acpi_status acpi_irq_parse_one_cb(struct acpi_resource *ares, return AE_CTRL_TERMINATE; case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: eirq = &ares->data.extended_irq; - if (eirq->producer_consumer == ACPI_PRODUCER) + if (!ctx->skip_producer_check && + eirq->producer_consumer == ACPI_PRODUCER) return AE_OK; if (ctx->index >= eirq->interrupt_count) { ctx->index -= eirq->interrupt_count; @@ -236,8 +238,19 @@ static acpi_status acpi_irq_parse_one_cb(struct acpi_resource *ares, static int acpi_irq_parse_one(acpi_handle handle, unsigned int index, struct irq_fwspec *fwspec, unsigned long *flags) { - struct acpi_irq_parse_one_ctx ctx = { -EINVAL, index, flags, fwspec }; + struct acpi_irq_parse_one_ctx ctx = { -EINVAL, index, flags, fwspec, false }; + /* + * Firmware on arm64-based HPE m400 platform incorrectly marks + * its UART interrupt as ACPI_PRODUCER rather than ACPI_CONSUMER. + * Don't do the producer/consumer check for that device. + */ + if (IS_ENABLED(CONFIG_ARM64)) { + struct acpi_device *adev = acpi_bus_get_acpi_device(handle); + + if (adev && !strcmp(acpi_device_hid(adev), "APMC0D08")) + ctx.skip_producer_check = true; + } acpi_walk_resources(handle, METHOD_NAME__CRS, acpi_irq_parse_one_cb, &ctx); return ctx.rc; } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index b24513ec3fae..8308569f66e0 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1721,6 +1721,15 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) if (!acpi_match_device_ids(device, i2c_multi_instantiate_ids)) return false; + /* + * Firmware on some arm64 X-Gene platforms will make the UART + * device appear as both a UART and a slave of that UART. Just + * bail out here for X-Gene UARTs. + */ + if (IS_ENABLED(CONFIG_ARM64) && + !strcmp(acpi_device_hid(device), "APMC0D08")) + return false; + INIT_LIST_HEAD(&resource_list); acpi_dev_get_resources(device, &resource_list, acpi_check_serial_bus_slave, diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index fec2e9754aed..bea4e2973259 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -671,6 +671,24 @@ int ahci_stop_engine(struct ata_port *ap) tmp &= ~PORT_CMD_START; writel(tmp, port_mmio + PORT_CMD); +#ifdef CONFIG_ARM64 + /* Rev Ax of Cavium CN99XX needs a hack for port stop */ + if (dev_is_pci(ap->host->dev) && + to_pci_dev(ap->host->dev)->vendor == 0x14e4 && + to_pci_dev(ap->host->dev)->device == 0x9027 && + midr_is_cpu_model_range(read_cpuid_id(), + MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN), + MIDR_CPU_VAR_REV(0, 0), + MIDR_CPU_VAR_REV(0, MIDR_REVISION_MASK))) { + tmp = readl(hpriv->mmio + 0x8000); + udelay(100); + writel(tmp | (1 << 26), hpriv->mmio + 0x8000); + udelay(100); + writel(tmp & ~(1 << 26), hpriv->mmio + 0x8000); + dev_warn(ap->host->dev, "CN99XX SATA reset workaround applied\n"); + } +#endif + /* wait for engine to stop. This could be as long as 500 msec */ tmp = ata_wait_register(ap, port_mmio + PORT_CMD, PORT_CMD_LIST_ON, PORT_CMD_LIST_ON, 1, 500); diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c index bbf7029e224b..cf7faa970dd6 100644 --- a/drivers/char/ipmi/ipmi_dmi.c +++ b/drivers/char/ipmi/ipmi_dmi.c @@ -215,6 +215,21 @@ static int __init scan_for_dmi_ipmi(void) { const struct dmi_device *dev = NULL; +#ifdef CONFIG_ARM64 + /* RHEL-only + * If this is ARM-based HPE m400, return now, because that platform + * reports the host-side ipmi address as intel port-io space, which + * does not exist in the ARM architecture. + */ + const char *dmistr = dmi_get_system_info(DMI_PRODUCT_NAME); + + if (dmistr && (strcmp("ProLiant m400 Server", dmistr) == 0)) { + pr_debug("%s does not support host ipmi\n", dmistr); + return 0; + } + /* END RHEL-only */ +#endif + while ((dev = dmi_find_device(DMI_DEV_TYPE_IPMI, NULL, dev))) dmi_decode_ipmi((const struct dmi_header *) dev->device_data); diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index e96cb5c4f97a..d645460fe8a9 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #define IPMI_DRIVER_VERSION "39.2" @@ -5160,8 +5161,21 @@ static int __init ipmi_init_msghandler_mod(void) { int rv; - pr_info("version " IPMI_DRIVER_VERSION "\n"); +#ifdef CONFIG_ARM64 + /* RHEL-only + * If this is ARM-based HPE m400, return now, because that platform + * reports the host-side ipmi address as intel port-io space, which + * does not exist in the ARM architecture. + */ + const char *dmistr = dmi_get_system_info(DMI_PRODUCT_NAME); + if (dmistr && (strcmp("ProLiant m400 Server", dmistr) == 0)) { + pr_debug("%s does not support host ipmi\n", dmistr); + return -ENOSYS; + } + /* END RHEL-only */ +#endif + pr_info("version " IPMI_DRIVER_VERSION "\n"); mutex_lock(&ipmi_interfaces_mutex); rv = ipmi_register_driver(); mutex_unlock(&ipmi_interfaces_mutex); diff --git a/drivers/char/random.c b/drivers/char/random.c index 605969ed0f96..4d51f1c67675 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -335,6 +335,7 @@ #include #include #include +#include #include #include @@ -349,6 +350,11 @@ /* #define ADD_INTERRUPT_BENCH */ +/* + * Hook for external RNG. + */ +static const struct random_extrng __rcu *extrng; + /* * Configuration information */ @@ -481,6 +487,9 @@ static int ratelimit_disable __read_mostly; module_param_named(ratelimit_disable, ratelimit_disable, int, 0644); MODULE_PARM_DESC(ratelimit_disable, "Disable random ratelimit suppression"); +static const struct file_operations extrng_random_fops; +static const struct file_operations extrng_urandom_fops; + /********************************************************************** * * OS independent entropy store. Here are the functions which handle @@ -1858,6 +1867,13 @@ random_poll(struct file *file, poll_table * wait) return mask; } +static __poll_t +extrng_poll(struct file *file, poll_table * wait) +{ + /* extrng pool is always full, always read, no writes */ + return EPOLLIN | EPOLLRDNORM; +} + static int write_pool(struct entropy_store *r, const char __user *buffer, size_t count) { @@ -1961,7 +1977,58 @@ static int random_fasync(int fd, struct file *filp, int on) return fasync_helper(fd, filp, on, &fasync); } +static int random_open(struct inode *inode, struct file *filp) +{ + const struct random_extrng *rng; + + rcu_read_lock(); + rng = rcu_dereference(extrng); + if (rng && !try_module_get(rng->owner)) + rng = NULL; + rcu_read_unlock(); + + if (!rng) + return 0; + + filp->f_op = &extrng_random_fops; + filp->private_data = rng->owner; + + return 0; +} + +static int urandom_open(struct inode *inode, struct file *filp) +{ + const struct random_extrng *rng; + + rcu_read_lock(); + rng = rcu_dereference(extrng); + if (rng && !try_module_get(rng->owner)) + rng = NULL; + rcu_read_unlock(); + + if (!rng) + return 0; + + filp->f_op = &extrng_urandom_fops; + filp->private_data = rng->owner; + + return 0; +} + +static int extrng_release(struct inode *inode, struct file *filp) +{ + module_put(filp->private_data); + return 0; +} + +static ssize_t +extrng_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) +{ + return rcu_dereference_raw(extrng)->extrng_read(buf, nbytes); +} + const struct file_operations random_fops = { + .open = random_open, .read = random_read, .write = random_write, .poll = random_poll, @@ -1972,6 +2039,7 @@ const struct file_operations random_fops = { }; const struct file_operations urandom_fops = { + .open = urandom_open, .read = urandom_read, .write = random_write, .unlocked_ioctl = random_ioctl, @@ -1980,9 +2048,31 @@ const struct file_operations urandom_fops = { .llseek = noop_llseek, }; +static const struct file_operations extrng_random_fops = { + .open = random_open, + .read = extrng_read, + .write = random_write, + .poll = extrng_poll, + .unlocked_ioctl = random_ioctl, + .fasync = random_fasync, + .llseek = noop_llseek, + .release = extrng_release, +}; + +static const struct file_operations extrng_urandom_fops = { + .open = urandom_open, + .read = extrng_read, + .write = random_write, + .unlocked_ioctl = random_ioctl, + .fasync = random_fasync, + .llseek = noop_llseek, + .release = extrng_release, +}; + SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, unsigned int, flags) { + const struct random_extrng *rng; int ret; if (flags & ~(GRND_NONBLOCK|GRND_RANDOM|GRND_INSECURE)) @@ -1998,6 +2088,18 @@ SYSCALL_DEFINE3(getrandom, char __user *, buf, size_t, count, if (count > INT_MAX) count = INT_MAX; + rcu_read_lock(); + rng = rcu_dereference(extrng); + if (rng && !try_module_get(rng->owner)) + rng = NULL; + rcu_read_unlock(); + + if (rng) { + ret = rng->extrng_read(buf, count); + module_put(rng->owner); + return ret; + } + if (!(flags & GRND_INSECURE) && !crng_ready()) { if (flags & GRND_NONBLOCK) return -EAGAIN; @@ -2303,3 +2405,16 @@ void add_bootloader_randomness(const void *buf, unsigned int size) add_device_randomness(buf, size); } EXPORT_SYMBOL_GPL(add_bootloader_randomness); + +void random_register_extrng(const struct random_extrng *rng) +{ + rcu_assign_pointer(extrng, rng); +} +EXPORT_SYMBOL_GPL(random_register_extrng); + +void random_unregister_extrng(void) +{ + RCU_INIT_POINTER(extrng, NULL); + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(random_unregister_extrng); diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index 467e94259679..9b6f5b8e5397 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_map.o obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o obj-$(CONFIG_EFI_TEST) += test/ obj-$(CONFIG_EFI_DEV_PATH_PARSER) += dev-path-parser.o +obj-$(CONFIG_EFI) += secureboot.o obj-$(CONFIG_APPLE_PROPERTIES) += apple-properties.o obj-$(CONFIG_EFI_RCI2_TABLE) += rci2-table.o obj-$(CONFIG_EFI_EMBEDDED_FIRMWARE) += embedded-firmware.o diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 847f33ffc4ae..363037f8eaf8 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -841,40 +842,101 @@ int efi_mem_type(unsigned long phys_addr) } #endif +struct efi_error_code { + efi_status_t status; + int errno; + const char *description; +}; + +static const struct efi_error_code efi_error_codes[] = { + { EFI_SUCCESS, 0, "Success"}, +#if 0 + { EFI_LOAD_ERROR, -EPICK_AN_ERRNO, "Load Error"}, +#endif + { EFI_INVALID_PARAMETER, -EINVAL, "Invalid Parameter"}, + { EFI_UNSUPPORTED, -ENOSYS, "Unsupported"}, + { EFI_BAD_BUFFER_SIZE, -ENOSPC, "Bad Buffer Size"}, + { EFI_BUFFER_TOO_SMALL, -ENOSPC, "Buffer Too Small"}, + { EFI_NOT_READY, -EAGAIN, "Not Ready"}, + { EFI_DEVICE_ERROR, -EIO, "Device Error"}, + { EFI_WRITE_PROTECTED, -EROFS, "Write Protected"}, + { EFI_OUT_OF_RESOURCES, -ENOMEM, "Out of Resources"}, +#if 0 + { EFI_VOLUME_CORRUPTED, -EPICK_AN_ERRNO, "Volume Corrupt"}, + { EFI_VOLUME_FULL, -EPICK_AN_ERRNO, "Volume Full"}, + { EFI_NO_MEDIA, -EPICK_AN_ERRNO, "No Media"}, + { EFI_MEDIA_CHANGED, -EPICK_AN_ERRNO, "Media changed"}, +#endif + { EFI_NOT_FOUND, -ENOENT, "Not Found"}, +#if 0 + { EFI_ACCESS_DENIED, -EPICK_AN_ERRNO, "Access Denied"}, + { EFI_NO_RESPONSE, -EPICK_AN_ERRNO, "No Response"}, + { EFI_NO_MAPPING, -EPICK_AN_ERRNO, "No mapping"}, + { EFI_TIMEOUT, -EPICK_AN_ERRNO, "Time out"}, + { EFI_NOT_STARTED, -EPICK_AN_ERRNO, "Not started"}, + { EFI_ALREADY_STARTED, -EPICK_AN_ERRNO, "Already started"}, +#endif + { EFI_ABORTED, -EINTR, "Aborted"}, +#if 0 + { EFI_ICMP_ERROR, -EPICK_AN_ERRNO, "ICMP Error"}, + { EFI_TFTP_ERROR, -EPICK_AN_ERRNO, "TFTP Error"}, + { EFI_PROTOCOL_ERROR, -EPICK_AN_ERRNO, "Protocol Error"}, + { EFI_INCOMPATIBLE_VERSION, -EPICK_AN_ERRNO, "Incompatible Version"}, +#endif + { EFI_SECURITY_VIOLATION, -EACCES, "Security Policy Violation"}, +#if 0 + { EFI_CRC_ERROR, -EPICK_AN_ERRNO, "CRC Error"}, + { EFI_END_OF_MEDIA, -EPICK_AN_ERRNO, "End of Media"}, + { EFI_END_OF_FILE, -EPICK_AN_ERRNO, "End of File"}, + { EFI_INVALID_LANGUAGE, -EPICK_AN_ERRNO, "Invalid Languages"}, + { EFI_COMPROMISED_DATA, -EPICK_AN_ERRNO, "Compromised Data"}, + + // warnings + { EFI_WARN_UNKOWN_GLYPH, -EPICK_AN_ERRNO, "Warning Unknown Glyph"}, + { EFI_WARN_DELETE_FAILURE, -EPICK_AN_ERRNO, "Warning Delete Failure"}, + { EFI_WARN_WRITE_FAILURE, -EPICK_AN_ERRNO, "Warning Write Failure"}, + { EFI_WARN_BUFFER_TOO_SMALL, -EPICK_AN_ERRNO, "Warning Buffer Too Small"}, +#endif +}; + +static int +efi_status_cmp_bsearch(const void *key, const void *item) +{ + u64 status = (u64)(uintptr_t)key; + struct efi_error_code *code = (struct efi_error_code *)item; + + if (status < code->status) + return -1; + if (status > code->status) + return 1; + return 0; +} + int efi_status_to_err(efi_status_t status) { - int err; - - switch (status) { - case EFI_SUCCESS: - err = 0; - break; - case EFI_INVALID_PARAMETER: - err = -EINVAL; - break; - case EFI_OUT_OF_RESOURCES: - err = -ENOSPC; - break; - case EFI_DEVICE_ERROR: - err = -EIO; - break; - case EFI_WRITE_PROTECTED: - err = -EROFS; - break; - case EFI_SECURITY_VIOLATION: - err = -EACCES; - break; - case EFI_NOT_FOUND: - err = -ENOENT; - break; - case EFI_ABORTED: - err = -EINTR; - break; - default: - err = -EINVAL; - } + struct efi_error_code *found; + size_t num = sizeof(efi_error_codes) / sizeof(struct efi_error_code); - return err; + found = bsearch((void *)(uintptr_t)status, efi_error_codes, + sizeof(struct efi_error_code), num, + efi_status_cmp_bsearch); + if (!found) + return -EINVAL; + return found->errno; +} + +const char * +efi_status_to_str(efi_status_t status) +{ + struct efi_error_code *found; + size_t num = sizeof(efi_error_codes) / sizeof(struct efi_error_code); + + found = bsearch((void *)(uintptr_t)status, efi_error_codes, + sizeof(struct efi_error_code), num, + efi_status_cmp_bsearch); + if (!found) + return "Unknown error code"; + return found->description; } static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock); diff --git a/drivers/firmware/efi/secureboot.c b/drivers/firmware/efi/secureboot.c new file mode 100644 index 000000000000..de0a3714a5d4 --- /dev/null +++ b/drivers/firmware/efi/secureboot.c @@ -0,0 +1,38 @@ +/* Core kernel secure boot support. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +/* + * Decide what to do when UEFI secure boot mode is enabled. + */ +void __init efi_set_secure_boot(enum efi_secureboot_mode mode) +{ + if (efi_enabled(EFI_BOOT)) { + switch (mode) { + case efi_secureboot_mode_disabled: + pr_info("Secure boot disabled\n"); + break; + case efi_secureboot_mode_enabled: + set_bit(EFI_SECURE_BOOT, &efi.flags); + pr_info("Secure boot enabled\n"); + break; + default: + pr_warn("Secure boot could not be determined (mode %u)\n", + mode); + break; + } + } +} diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c index 311eee599ce9..2460c6bd46f8 100644 --- a/drivers/hid/hid-rmi.c +++ b/drivers/hid/hid-rmi.c @@ -322,19 +322,12 @@ static int rmi_input_event(struct hid_device *hdev, u8 *data, int size) { struct rmi_data *hdata = hid_get_drvdata(hdev); struct rmi_device *rmi_dev = hdata->xport.rmi_dev; - unsigned long flags; if (!(test_bit(RMI_STARTED, &hdata->flags))) return 0; - local_irq_save(flags); - rmi_set_attn_data(rmi_dev, data[1], &data[2], size - 2); - generic_handle_irq(hdata->rmi_irq); - - local_irq_restore(flags); - return 1; } @@ -591,56 +584,6 @@ static const struct rmi_transport_ops hid_rmi_ops = { .reset = rmi_hid_reset, }; -static void rmi_irq_teardown(void *data) -{ - struct rmi_data *hdata = data; - struct irq_domain *domain = hdata->domain; - - if (!domain) - return; - - irq_dispose_mapping(irq_find_mapping(domain, 0)); - - irq_domain_remove(domain); - hdata->domain = NULL; - hdata->rmi_irq = 0; -} - -static int rmi_irq_map(struct irq_domain *h, unsigned int virq, - irq_hw_number_t hw_irq_num) -{ - irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); - - return 0; -} - -static const struct irq_domain_ops rmi_irq_ops = { - .map = rmi_irq_map, -}; - -static int rmi_setup_irq_domain(struct hid_device *hdev) -{ - struct rmi_data *hdata = hid_get_drvdata(hdev); - int ret; - - hdata->domain = irq_domain_create_linear(hdev->dev.fwnode, 1, - &rmi_irq_ops, hdata); - if (!hdata->domain) - return -ENOMEM; - - ret = devm_add_action_or_reset(&hdev->dev, &rmi_irq_teardown, hdata); - if (ret) - return ret; - - hdata->rmi_irq = irq_create_mapping(hdata->domain, 0); - if (hdata->rmi_irq <= 0) { - hid_err(hdev, "Can't allocate an IRQ\n"); - return hdata->rmi_irq < 0 ? hdata->rmi_irq : -ENXIO; - } - - return 0; -} - static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id) { struct rmi_data *data = NULL; @@ -713,18 +656,11 @@ static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id) mutex_init(&data->page_mutex); - ret = rmi_setup_irq_domain(hdev); - if (ret) { - hid_err(hdev, "failed to allocate IRQ domain\n"); - return ret; - } - if (data->device_flags & RMI_DEVICE_HAS_PHYS_BUTTONS) rmi_hid_pdata.gpio_data.disable = true; data->xport.dev = hdev->dev.parent; data->xport.pdata = rmi_hid_pdata; - data->xport.pdata.irq = data->rmi_irq; data->xport.proto_name = "hid"; data->xport.ops = &hid_rmi_ops; diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index da27cd4a3c38..5404b4c407c5 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -2071,6 +2072,16 @@ static const struct amba_id etm4_ids[] = { {}, }; +static const struct dmi_system_id broken_coresight[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HPE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Apollo 70"), + }, + }, + { } /* terminating entry */ +}; + MODULE_DEVICE_TABLE(amba, etm4_ids); static struct amba_driver etm4x_amba_driver = { @@ -2104,6 +2115,11 @@ static int __init etm4x_init(void) { int ret; + if (dmi_check_system(broken_coresight)) { + pr_info("ETM4 disabled due to firmware bug\n"); + return 0; + } + ret = etm4_pm_setup(); /* etm4_pm_setup() does its own cleanup - exit on error */ @@ -2130,6 +2146,9 @@ static int __init etm4x_init(void) static void __exit etm4x_exit(void) { + if (dmi_check_system(broken_coresight)) + return; + amba_driver_unregister(&etm4x_amba_driver); platform_driver_unregister(&etm4_platform_driver); etm4_pm_clear(); diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 258d5fe3d395..f7298e3dc8f3 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -182,34 +182,47 @@ void rmi_set_attn_data(struct rmi_device *rmi_dev, unsigned long irq_status, attn_data.data = fifo_data; kfifo_put(&drvdata->attn_fifo, attn_data); + + schedule_work(&drvdata->attn_work); } EXPORT_SYMBOL_GPL(rmi_set_attn_data); -static irqreturn_t rmi_irq_fn(int irq, void *dev_id) +static void attn_callback(struct work_struct *work) { - struct rmi_device *rmi_dev = dev_id; - struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev); + struct rmi_driver_data *drvdata = container_of(work, + struct rmi_driver_data, + attn_work); struct rmi4_attn_data attn_data = {0}; int ret, count; count = kfifo_get(&drvdata->attn_fifo, &attn_data); - if (count) { - *(drvdata->irq_status) = attn_data.irq_status; - drvdata->attn_data = attn_data; - } + if (!count) + return; - ret = rmi_process_interrupt_requests(rmi_dev); + *(drvdata->irq_status) = attn_data.irq_status; + drvdata->attn_data = attn_data; + + ret = rmi_process_interrupt_requests(drvdata->rmi_dev); if (ret) - rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev, + rmi_dbg(RMI_DEBUG_CORE, &drvdata->rmi_dev->dev, "Failed to process interrupt request: %d\n", ret); - if (count) { - kfree(attn_data.data); - drvdata->attn_data.data = NULL; - } + kfree(attn_data.data); + drvdata->attn_data.data = NULL; if (!kfifo_is_empty(&drvdata->attn_fifo)) - return rmi_irq_fn(irq, dev_id); + schedule_work(&drvdata->attn_work); +} + +static irqreturn_t rmi_irq_fn(int irq, void *dev_id) +{ + struct rmi_device *rmi_dev = dev_id; + int ret; + + ret = rmi_process_interrupt_requests(rmi_dev); + if (ret) + rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev, + "Failed to process interrupt request: %d\n", ret); return IRQ_HANDLED; } @@ -217,7 +230,6 @@ static irqreturn_t rmi_irq_fn(int irq, void *dev_id) static int rmi_irq_init(struct rmi_device *rmi_dev) { struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev); - struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev); int irq_flags = irq_get_trigger_type(pdata->irq); int ret; @@ -235,8 +247,6 @@ static int rmi_irq_init(struct rmi_device *rmi_dev) return ret; } - data->enabled = true; - return 0; } @@ -886,23 +896,27 @@ void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake) if (data->enabled) goto out; - enable_irq(irq); - data->enabled = true; - if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) { - retval = disable_irq_wake(irq); - if (retval) - dev_warn(&rmi_dev->dev, - "Failed to disable irq for wake: %d\n", - retval); - } + if (irq) { + enable_irq(irq); + data->enabled = true; + if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) { + retval = disable_irq_wake(irq); + if (retval) + dev_warn(&rmi_dev->dev, + "Failed to disable irq for wake: %d\n", + retval); + } - /* - * Call rmi_process_interrupt_requests() after enabling irq, - * otherwise we may lose interrupt on edge-triggered systems. - */ - irq_flags = irq_get_trigger_type(pdata->irq); - if (irq_flags & IRQ_TYPE_EDGE_BOTH) - rmi_process_interrupt_requests(rmi_dev); + /* + * Call rmi_process_interrupt_requests() after enabling irq, + * otherwise we may lose interrupt on edge-triggered systems. + */ + irq_flags = irq_get_trigger_type(pdata->irq); + if (irq_flags & IRQ_TYPE_EDGE_BOTH) + rmi_process_interrupt_requests(rmi_dev); + } else { + data->enabled = true; + } out: mutex_unlock(&data->enabled_mutex); @@ -922,20 +936,22 @@ void rmi_disable_irq(struct rmi_device *rmi_dev, bool enable_wake) goto out; data->enabled = false; - disable_irq(irq); - if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) { - retval = enable_irq_wake(irq); - if (retval) - dev_warn(&rmi_dev->dev, - "Failed to enable irq for wake: %d\n", - retval); - } - - /* make sure the fifo is clean */ - while (!kfifo_is_empty(&data->attn_fifo)) { - count = kfifo_get(&data->attn_fifo, &attn_data); - if (count) - kfree(attn_data.data); + if (irq) { + disable_irq(irq); + if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) { + retval = enable_irq_wake(irq); + if (retval) + dev_warn(&rmi_dev->dev, + "Failed to enable irq for wake: %d\n", + retval); + } + } else { + /* make sure the fifo is clean */ + while (!kfifo_is_empty(&data->attn_fifo)) { + count = kfifo_get(&data->attn_fifo, &attn_data); + if (count) + kfree(attn_data.data); + } } out: @@ -981,6 +997,8 @@ static int rmi_driver_remove(struct device *dev) irq_domain_remove(data->irqdomain); data->irqdomain = NULL; + cancel_work_sync(&data->attn_work); + rmi_f34_remove_sysfs(rmi_dev); rmi_free_function_list(rmi_dev); @@ -1219,9 +1237,15 @@ static int rmi_driver_probe(struct device *dev) } } - retval = rmi_irq_init(rmi_dev); - if (retval < 0) - goto err_destroy_functions; + if (pdata->irq) { + retval = rmi_irq_init(rmi_dev); + if (retval < 0) + goto err_destroy_functions; + } + + data->enabled = true; + + INIT_WORK(&data->attn_work, attn_callback); if (data->f01_container->dev.driver) { /* Driver already bound, so enable ATTN now. */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 63f0af10c403..195be16dbd39 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "iommu: " fmt #include +#include #include #include #include @@ -3039,6 +3040,27 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle) } EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); +#ifdef CONFIG_ARM64 +static int __init iommu_quirks(void) +{ + const char *vendor, *name; + + vendor = dmi_get_system_info(DMI_SYS_VENDOR); + name = dmi_get_system_info(DMI_PRODUCT_NAME); + + if (vendor && + (strncmp(vendor, "GIGABYTE", 8) == 0 && name && + (strncmp(name, "R120", 4) == 0 || + strncmp(name, "R270", 4) == 0))) { + pr_warn("Gigabyte %s detected, force iommu passthrough mode", name); + iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; + } + + return 0; +} +arch_initcall(iommu_quirks); +#endif + /* * Changes the default domain of an iommu group that has *only* one device * diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index 5b2143f4b1f8..3178efd98006 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -113,7 +113,8 @@ enum e1000_boards { board_pch2lan, board_pch_lpt, board_pch_spt, - board_pch_cnp + board_pch_cnp, + board_pch_tgp }; struct e1000_ps_page { @@ -499,6 +500,7 @@ extern const struct e1000_info e1000_pch2_info; extern const struct e1000_info e1000_pch_lpt_info; extern const struct e1000_info e1000_pch_spt_info; extern const struct e1000_info e1000_pch_cnp_info; +extern const struct e1000_info e1000_pch_tgp_info; extern const struct e1000_info e1000_es2_info; void e1000e_ptp_init(struct e1000_adapter *adapter); diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index a80336c4319b..f8b3e758a8d2 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -4804,7 +4804,7 @@ static s32 e1000_reset_hw_ich8lan(struct e1000_hw *hw) static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) { struct e1000_mac_info *mac = &hw->mac; - u32 ctrl_ext, txdctl, snoop; + u32 ctrl_ext, txdctl, snoop, fflt_dbg; s32 ret_val; u16 i; @@ -4863,6 +4863,15 @@ static s32 e1000_init_hw_ich8lan(struct e1000_hw *hw) snoop = (u32)~(PCIE_NO_SNOOP_ALL); e1000e_set_pcie_no_snoop(hw, snoop); + /* Enable workaround for packet loss issue on TGP PCH + * Do not gate DMA clock from the modPHY block + */ + if (mac->type >= e1000_pch_tgp) { + fflt_dbg = er32(FFLT_DBG); + fflt_dbg |= E1000_FFLT_DBG_DONT_GATE_WAKE_DMA_CLK; + ew32(FFLT_DBG, fflt_dbg); + } + ctrl_ext = er32(CTRL_EXT); ctrl_ext |= E1000_CTRL_EXT_RO_DIS; ew32(CTRL_EXT, ctrl_ext); @@ -5983,3 +5992,23 @@ const struct e1000_info e1000_pch_cnp_info = { .phy_ops = &ich8_phy_ops, .nvm_ops = &spt_nvm_ops, }; + +const struct e1000_info e1000_pch_tgp_info = { + .mac = e1000_pch_tgp, + .flags = FLAG_IS_ICH + | FLAG_HAS_WOL + | FLAG_HAS_HW_TIMESTAMP + | FLAG_HAS_CTRLEXT_ON_LOAD + | FLAG_HAS_AMT + | FLAG_HAS_FLASH + | FLAG_HAS_JUMBO_FRAMES + | FLAG_APME_IN_WUC, + .flags2 = FLAG2_HAS_PHY_STATS + | FLAG2_HAS_EEE, + .pba = 26, + .max_hw_frame_size = 9022, + .get_variants = e1000_get_variants_ich8lan, + .mac_ops = &ich8_mac_ops, + .phy_ops = &ich8_phy_ops, + .nvm_ops = &spt_nvm_ops, +}; diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.h b/drivers/net/ethernet/intel/e1000e/ich8lan.h index e757896287eb..8f2a8f4ce0ee 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.h +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.h @@ -286,6 +286,9 @@ /* Proprietary Latency Tolerance Reporting PCI Capability */ #define E1000_PCI_LTR_CAP_LPT 0xA8 +/* Don't gate wake DMA clock */ +#define E1000_FFLT_DBG_DONT_GATE_WAKE_DMA_CLK 0x1000 + void e1000e_write_protect_nvm_ich8lan(struct e1000_hw *hw); void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw, bool state); diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 757a54c39eef..774f849027f0 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -51,6 +51,7 @@ static const struct e1000_info *e1000_info_tbl[] = { [board_pch_lpt] = &e1000_pch_lpt_info, [board_pch_spt] = &e1000_pch_spt_info, [board_pch_cnp] = &e1000_pch_cnp_info, + [board_pch_tgp] = &e1000_pch_tgp_info, }; struct e1000_reg_info { @@ -7844,20 +7845,20 @@ static const struct pci_device_id e1000_pci_tbl[] = { { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V11), board_pch_cnp }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_LM12), board_pch_spt }, { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_CMP_I219_V12), board_pch_spt }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM13), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V13), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM14), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_cnp }, - { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_cnp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM13), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V13), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM14), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V14), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_LM15), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_TGP_I219_V15), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM16), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V16), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_LM17), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_ADP_I219_V17), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM18), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V18), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_LM19), board_pch_tgp }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_PCH_MTP_I219_V19), board_pch_tgp }, { 0, 0, 0, 0, 0, 0, 0 } /* terminate list */ }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 6924a6aacbd5..60461ff4deae 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -1529,6 +1529,7 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) return ret; } + pm_runtime_enable(dev); pm_runtime_get_sync(dev); if (bsp_priv->integrated_phy) @@ -1539,10 +1540,13 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) static void rk_gmac_powerdown(struct rk_priv_data *gmac) { + struct device *dev = &gmac->pdev->dev; + if (gmac->integrated_phy) rk_gmac_integrated_phy_powerdown(gmac); pm_runtime_put_sync(&gmac->pdev->dev); + pm_runtime_disable(dev); phy_power_on(gmac, false); gmac_clk_enable(gmac, false); diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c index 75dbe77b0b4b..4bd6dd722f44 100644 --- a/drivers/net/wireguard/main.c +++ b/drivers/net/wireguard/main.c @@ -12,6 +12,7 @@ #include +#include #include #include #include @@ -21,6 +22,11 @@ static int __init mod_init(void) { int ret; +#ifdef CONFIG_RHEL_DIFFERENCES + if (fips_enabled) + return -EOPNOTSUPP; +#endif + ret = wg_allowedips_slab_init(); if (ret < 0) goto err_allowedips; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 8c3c1ef92171..18431236ab9f 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4232,6 +4232,30 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9000, DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9084, quirk_bridge_cavm_thrx2_pcie_root); +/* + * PCI BAR 5 is not setup correctly for the on-board AHCI controller + * on Broadcom's Vulcan processor. Added a quirk to fix BAR 5 by + * using BAR 4's resources which are populated correctly and NOT + * actually used by the AHCI controller. + */ +static void quirk_fix_vulcan_ahci_bars(struct pci_dev *dev) +{ + struct resource *r = &dev->resource[4]; + + if (!(r->flags & IORESOURCE_MEM) || (r->start == 0)) + return; + + /* Set BAR5 resource to BAR4 */ + dev->resource[5] = *r; + + /* Update BAR5 in pci config space */ + pci_write_config_dword(dev, PCI_BASE_ADDRESS_5, r->start); + + /* Clear BAR4's resource */ + memset(r, 0, sizeof(*r)); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9027, quirk_fix_vulcan_ahci_bars); + /* * Intersil/Techwell TW686[4589]-based video capture cards have an empty (zero) * class code. Fix it. diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 86658a81d284..5647f4756e97 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -5657,6 +5657,13 @@ static void hub_event(struct work_struct *work) (u16) hub->change_bits[0], (u16) hub->event_bits[0]); + /* Don't disconnect USB-SATA on TrimSlice */ + if (strcmp(dev_name(hdev->bus->controller), "tegra-ehci.0") == 0) { + if ((hdev->state == 7) && (hub->change_bits[0] == 0) && + (hub->event_bits[0] == 0x2)) + hub->event_bits[0] = 0; + } + /* Lock the device, then check to see if we were * disconnected while waiting for the lock to succeed. */ usb_lock_device(hdev); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 2c9e26a44546..f96324912d6a 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1539,10 +1539,6 @@ xfs_fs_fill_super( if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) sb->s_flags |= SB_I_VERSION; - if (xfs_sb_version_hasbigtime(&mp->m_sb)) - xfs_warn(mp, - "EXPERIMENTAL big timestamp feature in use. Use at your own risk!"); - if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) { bool rtdev_is_dax = false, datadev_is_dax; @@ -1598,10 +1594,6 @@ xfs_fs_fill_super( goto out_filestream_unmount; } - if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) - xfs_warn(mp, - "EXPERIMENTAL inode btree counters feature in use. Use at your own risk!"); - error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; diff --git a/include/linux/efi.h b/include/linux/efi.h index 6b5d36babfcc..fd4a5d66a9d0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -43,6 +43,8 @@ #define EFI_ABORTED (21 | (1UL << (BITS_PER_LONG-1))) #define EFI_SECURITY_VIOLATION (26 | (1UL << (BITS_PER_LONG-1))) +#define EFI_IS_ERROR(x) ((x) & (1UL << (BITS_PER_LONG-1))) + typedef unsigned long efi_status_t; typedef u8 efi_bool_t; typedef u16 efi_char16_t; /* UNICODE character */ @@ -782,6 +784,14 @@ extern int __init efi_setup_pcdp_console(char *); #define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ #define EFI_MEM_NO_SOFT_RESERVE 11 /* Is the kernel configured to ignore soft reservations? */ #define EFI_PRESERVE_BS_REGIONS 12 /* Are EFI boot-services memory segments available? */ +#define EFI_SECURE_BOOT 13 /* Are we in Secure Boot mode? */ + +enum efi_secureboot_mode { + efi_secureboot_mode_unset, + efi_secureboot_mode_unknown, + efi_secureboot_mode_disabled, + efi_secureboot_mode_enabled, +}; #ifdef CONFIG_EFI /* @@ -793,6 +803,8 @@ static inline bool efi_enabled(int feature) } extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); +extern void __init efi_set_secure_boot(enum efi_secureboot_mode mode); + bool __pure __efi_soft_reserve_enabled(void); static inline bool __pure efi_soft_reserve_enabled(void) @@ -813,6 +825,8 @@ static inline bool efi_enabled(int feature) static inline void efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {} +static inline void efi_set_secure_boot(enum efi_secureboot_mode mode) {} + static inline bool efi_soft_reserve_enabled(void) { return false; @@ -825,6 +839,7 @@ static inline bool efi_rt_services_supported(unsigned int mask) #endif extern int efi_status_to_err(efi_status_t status); +extern const char *efi_status_to_str(efi_status_t status); /* * Variable Attributes @@ -1077,13 +1092,6 @@ static inline bool efi_runtime_disabled(void) { return true; } extern void efi_call_virt_check_flags(unsigned long flags, const char *call); extern unsigned long efi_call_virt_save_flags(void); -enum efi_secureboot_mode { - efi_secureboot_mode_unset, - efi_secureboot_mode_unknown, - efi_secureboot_mode_disabled, - efi_secureboot_mode_enabled, -}; - static inline enum efi_secureboot_mode efi_get_secureboot_mode(efi_get_variable_t *get_var) { diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 2adeea44c0d5..517013ece679 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -394,6 +394,8 @@ LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux) #endif /* CONFIG_BPF_SYSCALL */ LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) +LSM_HOOK(int, 0, lock_kernel_down, const char *where, enum lockdown_reason level) + #ifdef CONFIG_PERF_EVENTS LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 5c4c5c0602cb..753b53038690 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1545,6 +1545,12 @@ * * @what: kernel feature being accessed * + * @lock_kernel_down + * Put the kernel into lock-down mode. + * + * @where: Where the lock-down is originating from (e.g. command line option) + * @level: The lock-down level (can only increase) + * * Security hooks for perf events * * @perf_event_open: diff --git a/include/linux/random.h b/include/linux/random.h index f45b8be3e3c4..7ccdec68b789 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -14,6 +14,11 @@ #include +struct random_extrng { + ssize_t (*extrng_read)(void __user *buf, size_t buflen); + struct module *owner; +}; + struct random_ready_callback { struct list_head list; void (*func)(struct random_ready_callback *rdy); @@ -44,6 +49,8 @@ extern bool rng_is_initialized(void); extern int add_random_ready_callback(struct random_ready_callback *rdy); extern void del_random_ready_callback(struct random_ready_callback *rdy); extern int __must_check get_random_bytes_arch(void *buf, int nbytes); +void random_register_extrng(const struct random_extrng *rng); +void random_unregister_extrng(void); #ifndef MODULE extern const struct file_operations random_fops, urandom_fops; diff --git a/include/linux/rmi.h b/include/linux/rmi.h index ab7eea01ab42..fff7c5f737fc 100644 --- a/include/linux/rmi.h +++ b/include/linux/rmi.h @@ -364,6 +364,7 @@ struct rmi_driver_data { struct rmi4_attn_data attn_data; DECLARE_KFIFO(attn_fifo, struct rmi4_attn_data, 16); + struct work_struct attn_work; }; int rmi_register_transport_device(struct rmi_transport_dev *xport); diff --git a/include/linux/security.h b/include/linux/security.h index 5b7288521300..f966f591c91e 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -472,6 +472,7 @@ int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); int security_locked_down(enum lockdown_reason what); +int security_lock_kernel_down(const char *where, enum lockdown_reason level); #else /* CONFIG_SECURITY */ static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) @@ -1348,6 +1349,10 @@ static inline int security_locked_down(enum lockdown_reason what) { return 0; } +static inline int security_lock_kernel_down(const char *where, enum lockdown_reason level) +{ + return 0; +} #endif /* CONFIG_SECURITY */ #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) diff --git a/init/Kconfig b/init/Kconfig index 55f9f7738ebb..564553afb251 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1625,7 +1625,7 @@ config AIO this option saves about 7k. config IO_URING - bool "Enable IO uring support" if EXPERT + bool "Enable IO uring support" select IO_WQ default y help diff --git a/kernel/module_signing.c b/kernel/module_signing.c index 8723ae70ea1f..fb2d773498c2 100644 --- a/kernel/module_signing.c +++ b/kernel/module_signing.c @@ -38,8 +38,15 @@ int mod_verify_sig(const void *mod, struct load_info *info) modlen -= sig_len + sizeof(ms); info->len = modlen; - return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, + ret = verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, VERIFY_USE_SECONDARY_KEYRING, VERIFYING_MODULE_SIGNATURE, NULL, NULL); + if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) { + ret = verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, + VERIFY_USE_PLATFORM_KEYRING, + VERIFYING_MODULE_SIGNATURE, + NULL, NULL); + } + return ret; } diff --git a/mm/cma.c b/mm/cma.c index 995e15480937..588f7e7885cf 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -125,6 +125,12 @@ static void __init cma_activate_area(struct cma *cma) spin_lock_init(&cma->mem_head_lock); #endif +#ifdef CONFIG_RHEL_DIFFERENCES + /* s390x and ppc64 has been using CMA already in RHEL 8 as default. */ + if (!IS_ENABLED(CONFIG_S390) && !IS_ENABLED(CONFIG_PPC64)) + mark_tech_preview("CMA", NULL); +#endif /* CONFIG_RHEL_DIFFERENCES */ + return; not_in_zone: @@ -437,6 +443,10 @@ struct page *cma_alloc(struct cma *cma, unsigned long count, if (!cma || !cma->count || !cma->bitmap) goto out; +#ifdef CONFIG_RHEL_DIFFERENCES + pr_info_once("Initial CMA usage detected\n"); +#endif /* CONFIG_RHEL_DIFFERENCES */ + pr_debug("%s(cma %p, count %lu, align %d)\n", __func__, (void *)cma, count, align); diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index f290f78c3f30..d3e7ae04f5be 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -46,7 +46,8 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, return NULL; if (*status != EFI_BUFFER_TOO_SMALL) { - pr_err("Couldn't get size: 0x%lx\n", *status); + pr_err("Couldn't get size: %s (0x%lx)\n", + efi_status_to_str(*status), *status); return NULL; } @@ -57,7 +58,8 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, *status = efi.get_variable(name, guid, NULL, &lsize, db); if (*status != EFI_SUCCESS) { kfree(db); - pr_err("Error reading db var: 0x%lx\n", *status); + pr_err("Error reading db var: %s (0x%lx)\n", + efi_status_to_str(*status), *status); return NULL; } diff --git a/security/lockdown/Kconfig b/security/lockdown/Kconfig index e84ddf484010..d0501353a4b9 100644 --- a/security/lockdown/Kconfig +++ b/security/lockdown/Kconfig @@ -16,6 +16,19 @@ config SECURITY_LOCKDOWN_LSM_EARLY subsystem is fully initialised. If enabled, lockdown will unconditionally be called before any other LSMs. +config LOCK_DOWN_IN_EFI_SECURE_BOOT + bool "Lock down the kernel in EFI Secure Boot mode" + default n + depends on EFI && SECURITY_LOCKDOWN_LSM_EARLY + help + UEFI Secure Boot provides a mechanism for ensuring that the firmware + will only load signed bootloaders and kernels. Secure boot mode may + be determined from EFI variables provided by the system firmware if + not indicated by the boot parameters. + + Enabling this option results in kernel lockdown being triggered if + EFI Secure Boot is set. + choice prompt "Kernel default lockdown mode" default LOCK_DOWN_KERNEL_FORCE_NONE diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c index 87cbdc64d272..18555cf18da7 100644 --- a/security/lockdown/lockdown.c +++ b/security/lockdown/lockdown.c @@ -73,6 +73,7 @@ static int lockdown_is_locked_down(enum lockdown_reason what) static struct security_hook_list lockdown_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(locked_down, lockdown_is_locked_down), + LSM_HOOK_INIT(lock_kernel_down, lock_kernel_down), }; static int __init lockdown_lsm_init(void) diff --git a/security/security.c b/security/security.c index 9ffa9e9c5c55..36484cc9842d 100644 --- a/security/security.c +++ b/security/security.c @@ -2599,6 +2599,12 @@ int security_locked_down(enum lockdown_reason what) } EXPORT_SYMBOL(security_locked_down); +int security_lock_kernel_down(const char *where, enum lockdown_reason level) +{ + return call_int_hook(lock_kernel_down, 0, where, level); +} +EXPORT_SYMBOL(security_lock_kernel_down); + #ifdef CONFIG_PERF_EVENTS int security_perf_event_open(struct perf_event_attr *attr, int type) { diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 93f1f124ef89..70905889a07b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -425,7 +425,6 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_BPF_OBJS) \ $(TRUNNER_BPF_SKELS) \ $(TRUNNER_BPF_LSKELS) \ - $(TRUNNER_BPF_SKELS_LINKED) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c deleted file mode 100644 index e9916f2817ec..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include -#include -#include "linked_funcs.skel.h" - -void test_linked_funcs(void) -{ - int err; - struct linked_funcs *skel; - - skel = linked_funcs__open(); - if (!ASSERT_OK_PTR(skel, "skel_open")) - return; - - skel->rodata->my_tid = syscall(SYS_gettid); - skel->bss->syscall_id = SYS_getpgid; - - err = linked_funcs__load(skel); - if (!ASSERT_OK(err, "skel_load")) - goto cleanup; - - err = linked_funcs__attach(skel); - if (!ASSERT_OK(err, "skel_attach")) - goto cleanup; - - /* trigger */ - syscall(SYS_getpgid); - - ASSERT_EQ(skel->bss->output_val1, 2000 + 2000, "output_val1"); - ASSERT_EQ(skel->bss->output_ctx1, SYS_getpgid, "output_ctx1"); - ASSERT_EQ(skel->bss->output_weak1, 42, "output_weak1"); - - ASSERT_EQ(skel->bss->output_val2, 2 * 1000 + 2 * (2 * 1000), "output_val2"); - ASSERT_EQ(skel->bss->output_ctx2, SYS_getpgid, "output_ctx2"); - /* output_weak2 should never be updated */ - ASSERT_EQ(skel->bss->output_weak2, 0, "output_weak2"); - -cleanup: - linked_funcs__destroy(skel); -} diff --git a/tools/testing/selftests/bpf/prog_tests/linked_maps.c b/tools/testing/selftests/bpf/prog_tests/linked_maps.c deleted file mode 100644 index 85dcaaaf2775..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/linked_maps.c +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include -#include -#include "linked_maps.skel.h" - -void test_linked_maps(void) -{ - int err; - struct linked_maps *skel; - - skel = linked_maps__open_and_load(); - if (!ASSERT_OK_PTR(skel, "skel_open")) - return; - - err = linked_maps__attach(skel); - if (!ASSERT_OK(err, "skel_attach")) - goto cleanup; - - /* trigger */ - syscall(SYS_getpgid); - - ASSERT_EQ(skel->bss->output_first1, 2000, "output_first1"); - ASSERT_EQ(skel->bss->output_second1, 2, "output_second1"); - ASSERT_EQ(skel->bss->output_weak1, 2, "output_weak1"); - -cleanup: - linked_maps__destroy(skel); -} diff --git a/tools/testing/selftests/bpf/prog_tests/linked_vars.c b/tools/testing/selftests/bpf/prog_tests/linked_vars.c deleted file mode 100644 index 267166abe4c1..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/linked_vars.c +++ /dev/null @@ -1,43 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include -#include -#include "linked_vars.skel.h" - -void test_linked_vars(void) -{ - int err; - struct linked_vars *skel; - - skel = linked_vars__open(); - if (!ASSERT_OK_PTR(skel, "skel_open")) - return; - - skel->bss->input_bss1 = 1000; - skel->bss->input_bss2 = 2000; - skel->bss->input_bss_weak = 3000; - - err = linked_vars__load(skel); - if (!ASSERT_OK(err, "skel_load")) - goto cleanup; - - err = linked_vars__attach(skel); - if (!ASSERT_OK(err, "skel_attach")) - goto cleanup; - - /* trigger */ - syscall(SYS_getpgid); - - ASSERT_EQ(skel->bss->output_bss1, 1000 + 2000 + 3000, "output_bss1"); - ASSERT_EQ(skel->bss->output_bss2, 1000 + 2000 + 3000, "output_bss2"); - /* 10 comes from "winner" input_data_weak in first obj file */ - ASSERT_EQ(skel->bss->output_data1, 1 + 2 + 10, "output_bss1"); - ASSERT_EQ(skel->bss->output_data2, 1 + 2 + 10, "output_bss2"); - /* 100 comes from "winner" input_rodata_weak in first obj file */ - ASSERT_EQ(skel->bss->output_rodata1, 11 + 22 + 100, "output_weak1"); - ASSERT_EQ(skel->bss->output_rodata2, 11 + 22 + 100, "output_weak2"); - -cleanup: - linked_vars__destroy(skel); -} diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c deleted file mode 100644 index f62df4d023f9..000000000000 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ /dev/null @@ -1,545 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -/* WARNING: This implemenation is not necessarily the same - * as the tcp_cubic.c. The purpose is mainly for testing - * the kernel BPF logic. - * - * Highlights: - * 1. CONFIG_HZ .kconfig map is used. - * 2. In bictcp_update(), calculation is changed to use usec - * resolution (i.e. USEC_PER_JIFFY) instead of using jiffies. - * Thus, usecs_to_jiffies() is not used in the bpf_cubic.c. - * 3. In bitctcp_update() [under tcp_friendliness], the original - * "while (ca->ack_cnt > delta)" loop is changed to the equivalent - * "ca->ack_cnt / delta" operation. - */ - -#include -#include -#include -#include "bpf_tcp_helpers.h" - -char _license[] SEC("license") = "GPL"; - -#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) - -#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation - * max_cwnd = snd_cwnd * beta - */ -#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ - -/* Two methods of hybrid slow start */ -#define HYSTART_ACK_TRAIN 0x1 -#define HYSTART_DELAY 0x2 - -/* Number of delay samples for detecting the increase of delay */ -#define HYSTART_MIN_SAMPLES 8 -#define HYSTART_DELAY_MIN (4000U) /* 4ms */ -#define HYSTART_DELAY_MAX (16000U) /* 16 ms */ -#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) - -static int fast_convergence = 1; -static const int beta = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ -static int initial_ssthresh; -static const int bic_scale = 41; -static int tcp_friendliness = 1; - -static int hystart = 1; -static int hystart_detect = HYSTART_ACK_TRAIN | HYSTART_DELAY; -static int hystart_low_window = 16; -static int hystart_ack_delta_us = 2000; - -static const __u32 cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */ -static const __u32 beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3 - / (BICTCP_BETA_SCALE - beta); -/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3 - * so K = cubic_root( (wmax-cwnd)*rtt/c ) - * the unit of K is bictcp_HZ=2^10, not HZ - * - * c = bic_scale >> 10 - * rtt = 100ms - * - * the following code has been designed and tested for - * cwnd < 1 million packets - * RTT < 100 seconds - * HZ < 1,000,00 (corresponding to 10 nano-second) - */ - -/* 1/c * 2^2*bictcp_HZ * srtt, 2^40 */ -static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ)) - / (bic_scale * 10); - -/* BIC TCP Parameters */ -struct bictcp { - __u32 cnt; /* increase cwnd by 1 after ACKs */ - __u32 last_max_cwnd; /* last maximum snd_cwnd */ - __u32 last_cwnd; /* the last snd_cwnd */ - __u32 last_time; /* time when updated last_cwnd */ - __u32 bic_origin_point;/* origin point of bic function */ - __u32 bic_K; /* time to origin point - from the beginning of the current epoch */ - __u32 delay_min; /* min delay (usec) */ - __u32 epoch_start; /* beginning of an epoch */ - __u32 ack_cnt; /* number of acks */ - __u32 tcp_cwnd; /* estimated tcp cwnd */ - __u16 unused; - __u8 sample_cnt; /* number of samples to decide curr_rtt */ - __u8 found; /* the exit point is found? */ - __u32 round_start; /* beginning of each round */ - __u32 end_seq; /* end_seq of the round */ - __u32 last_ack; /* last time when the ACK spacing is close */ - __u32 curr_rtt; /* the minimum rtt of current round */ -}; - -static inline void bictcp_reset(struct bictcp *ca) -{ - ca->cnt = 0; - ca->last_max_cwnd = 0; - ca->last_cwnd = 0; - ca->last_time = 0; - ca->bic_origin_point = 0; - ca->bic_K = 0; - ca->delay_min = 0; - ca->epoch_start = 0; - ca->ack_cnt = 0; - ca->tcp_cwnd = 0; - ca->found = 0; -} - -extern unsigned long CONFIG_HZ __kconfig; -#define HZ CONFIG_HZ -#define USEC_PER_MSEC 1000UL -#define USEC_PER_SEC 1000000UL -#define USEC_PER_JIFFY (USEC_PER_SEC / HZ) - -static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor) -{ - return dividend / divisor; -} - -#define div64_ul div64_u64 - -#define BITS_PER_U64 (sizeof(__u64) * 8) -static __always_inline int fls64(__u64 x) -{ - int num = BITS_PER_U64 - 1; - - if (x == 0) - return 0; - - if (!(x & (~0ull << (BITS_PER_U64-32)))) { - num -= 32; - x <<= 32; - } - if (!(x & (~0ull << (BITS_PER_U64-16)))) { - num -= 16; - x <<= 16; - } - if (!(x & (~0ull << (BITS_PER_U64-8)))) { - num -= 8; - x <<= 8; - } - if (!(x & (~0ull << (BITS_PER_U64-4)))) { - num -= 4; - x <<= 4; - } - if (!(x & (~0ull << (BITS_PER_U64-2)))) { - num -= 2; - x <<= 2; - } - if (!(x & (~0ull << (BITS_PER_U64-1)))) - num -= 1; - - return num + 1; -} - -static __always_inline __u32 bictcp_clock_us(const struct sock *sk) -{ - return tcp_sk(sk)->tcp_mstamp; -} - -static __always_inline void bictcp_hystart_reset(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); - - ca->round_start = ca->last_ack = bictcp_clock_us(sk); - ca->end_seq = tp->snd_nxt; - ca->curr_rtt = ~0U; - ca->sample_cnt = 0; -} - -/* "struct_ops/" prefix is not a requirement - * It will be recognized as BPF_PROG_TYPE_STRUCT_OPS - * as long as it is used in one of the func ptr - * under SEC(".struct_ops"). - */ -SEC("struct_ops/bpf_cubic_init") -void BPF_PROG(bpf_cubic_init, struct sock *sk) -{ - struct bictcp *ca = inet_csk_ca(sk); - - bictcp_reset(ca); - - if (hystart) - bictcp_hystart_reset(sk); - - if (!hystart && initial_ssthresh) - tcp_sk(sk)->snd_ssthresh = initial_ssthresh; -} - -/* No prefix in SEC will also work. - * The remaining tcp-cubic functions have an easier way. - */ -SEC("no-sec-prefix-bictcp_cwnd_event") -void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event) -{ - if (event == CA_EVENT_TX_START) { - struct bictcp *ca = inet_csk_ca(sk); - __u32 now = tcp_jiffies32; - __s32 delta; - - delta = now - tcp_sk(sk)->lsndtime; - - /* We were application limited (idle) for a while. - * Shift epoch_start to keep cwnd growth to cubic curve. - */ - if (ca->epoch_start && delta > 0) { - ca->epoch_start += delta; - if (after(ca->epoch_start, now)) - ca->epoch_start = now; - } - return; - } -} - -/* - * cbrt(x) MSB values for x MSB values in [0..63]. - * Precomputed then refined by hand - Willy Tarreau - * - * For x in [0..63], - * v = cbrt(x << 18) - 1 - * cbrt(x) = (v[x] + 10) >> 6 - */ -static const __u8 v[] = { - /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118, - /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156, - /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179, - /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199, - /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215, - /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229, - /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242, - /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254, -}; - -/* calculate the cubic root of x using a table lookup followed by one - * Newton-Raphson iteration. - * Avg err ~= 0.195% - */ -static __always_inline __u32 cubic_root(__u64 a) -{ - __u32 x, b, shift; - - if (a < 64) { - /* a in [0..63] */ - return ((__u32)v[(__u32)a] + 35) >> 6; - } - - b = fls64(a); - b = ((b * 84) >> 8) - 1; - shift = (a >> (b * 3)); - - /* it is needed for verifier's bound check on v */ - if (shift >= 64) - return 0; - - x = ((__u32)(((__u32)v[shift] + 10) << b)) >> 6; - - /* - * Newton-Raphson iteration - * 2 - * x = ( 2 * x + a / x ) / 3 - * k+1 k k - */ - x = (2 * x + (__u32)div64_u64(a, (__u64)x * (__u64)(x - 1))); - x = ((x * 341) >> 10); - return x; -} - -/* - * Compute congestion window to use. - */ -static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd, - __u32 acked) -{ - __u32 delta, bic_target, max_cnt; - __u64 offs, t; - - ca->ack_cnt += acked; /* count the number of ACKed packets */ - - if (ca->last_cwnd == cwnd && - (__s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) - return; - - /* The CUBIC function can update ca->cnt at most once per jiffy. - * On all cwnd reduction events, ca->epoch_start is set to 0, - * which will force a recalculation of ca->cnt. - */ - if (ca->epoch_start && tcp_jiffies32 == ca->last_time) - goto tcp_friendliness; - - ca->last_cwnd = cwnd; - ca->last_time = tcp_jiffies32; - - if (ca->epoch_start == 0) { - ca->epoch_start = tcp_jiffies32; /* record beginning */ - ca->ack_cnt = acked; /* start counting */ - ca->tcp_cwnd = cwnd; /* syn with cubic */ - - if (ca->last_max_cwnd <= cwnd) { - ca->bic_K = 0; - ca->bic_origin_point = cwnd; - } else { - /* Compute new K based on - * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ) - */ - ca->bic_K = cubic_root(cube_factor - * (ca->last_max_cwnd - cwnd)); - ca->bic_origin_point = ca->last_max_cwnd; - } - } - - /* cubic function - calc*/ - /* calculate c * time^3 / rtt, - * while considering overflow in calculation of time^3 - * (so time^3 is done by using 64 bit) - * and without the support of division of 64bit numbers - * (so all divisions are done by using 32 bit) - * also NOTE the unit of those veriables - * time = (t - K) / 2^bictcp_HZ - * c = bic_scale >> 10 - * rtt = (srtt >> 3) / HZ - * !!! The following code does not have overflow problems, - * if the cwnd < 1 million packets !!! - */ - - t = (__s32)(tcp_jiffies32 - ca->epoch_start) * USEC_PER_JIFFY; - t += ca->delay_min; - /* change the unit from usec to bictcp_HZ */ - t <<= BICTCP_HZ; - t /= USEC_PER_SEC; - - if (t < ca->bic_K) /* t - K */ - offs = ca->bic_K - t; - else - offs = t - ca->bic_K; - - /* c/rtt * (t-K)^3 */ - delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ); - if (t < ca->bic_K) /* below origin*/ - bic_target = ca->bic_origin_point - delta; - else /* above origin*/ - bic_target = ca->bic_origin_point + delta; - - /* cubic function - calc bictcp_cnt*/ - if (bic_target > cwnd) { - ca->cnt = cwnd / (bic_target - cwnd); - } else { - ca->cnt = 100 * cwnd; /* very small increment*/ - } - - /* - * The initial growth of cubic function may be too conservative - * when the available bandwidth is still unknown. - */ - if (ca->last_max_cwnd == 0 && ca->cnt > 20) - ca->cnt = 20; /* increase cwnd 5% per RTT */ - -tcp_friendliness: - /* TCP Friendly */ - if (tcp_friendliness) { - __u32 scale = beta_scale; - __u32 n; - - /* update tcp cwnd */ - delta = (cwnd * scale) >> 3; - if (ca->ack_cnt > delta && delta) { - n = ca->ack_cnt / delta; - ca->ack_cnt -= n * delta; - ca->tcp_cwnd += n; - } - - if (ca->tcp_cwnd > cwnd) { /* if bic is slower than tcp */ - delta = ca->tcp_cwnd - cwnd; - max_cnt = cwnd / delta; - if (ca->cnt > max_cnt) - ca->cnt = max_cnt; - } - } - - /* The maximum rate of cwnd increase CUBIC allows is 1 packet per - * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT. - */ - ca->cnt = max(ca->cnt, 2U); -} - -/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */ -void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); - - if (!tcp_is_cwnd_limited(sk)) - return; - - if (tcp_in_slow_start(tp)) { - if (hystart && after(ack, ca->end_seq)) - bictcp_hystart_reset(sk); - acked = tcp_slow_start(tp, acked); - if (!acked) - return; - } - bictcp_update(ca, tp->snd_cwnd, acked); - tcp_cong_avoid_ai(tp, ca->cnt, acked); -} - -__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); - - ca->epoch_start = 0; /* end of epoch */ - - /* Wmax and fast convergence */ - if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence) - ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta)) - / (2 * BICTCP_BETA_SCALE); - else - ca->last_max_cwnd = tp->snd_cwnd; - - return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); -} - -void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state) -{ - if (new_state == TCP_CA_Loss) { - bictcp_reset(inet_csk_ca(sk)); - bictcp_hystart_reset(sk); - } -} - -#define GSO_MAX_SIZE 65536 - -/* Account for TSO/GRO delays. - * Otherwise short RTT flows could get too small ssthresh, since during - * slow start we begin with small TSO packets and ca->delay_min would - * not account for long aggregation delay when TSO packets get bigger. - * Ideally even with a very small RTT we would like to have at least one - * TSO packet being sent and received by GRO, and another one in qdisc layer. - * We apply another 100% factor because @rate is doubled at this point. - * We cap the cushion to 1ms. - */ -static __always_inline __u32 hystart_ack_delay(struct sock *sk) -{ - unsigned long rate; - - rate = sk->sk_pacing_rate; - if (!rate) - return 0; - return min((__u64)USEC_PER_MSEC, - div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate)); -} - -static __always_inline void hystart_update(struct sock *sk, __u32 delay) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); - __u32 threshold; - - if (hystart_detect & HYSTART_ACK_TRAIN) { - __u32 now = bictcp_clock_us(sk); - - /* first detection parameter - ack-train detection */ - if ((__s32)(now - ca->last_ack) <= hystart_ack_delta_us) { - ca->last_ack = now; - - threshold = ca->delay_min + hystart_ack_delay(sk); - - /* Hystart ack train triggers if we get ack past - * ca->delay_min/2. - * Pacing might have delayed packets up to RTT/2 - * during slow start. - */ - if (sk->sk_pacing_status == SK_PACING_NONE) - threshold >>= 1; - - if ((__s32)(now - ca->round_start) > threshold) { - ca->found = 1; - tp->snd_ssthresh = tp->snd_cwnd; - } - } - } - - if (hystart_detect & HYSTART_DELAY) { - /* obtain the minimum delay of more than sampling packets */ - if (ca->curr_rtt > delay) - ca->curr_rtt = delay; - if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { - ca->sample_cnt++; - } else { - if (ca->curr_rtt > ca->delay_min + - HYSTART_DELAY_THRESH(ca->delay_min >> 3)) { - ca->found = 1; - tp->snd_ssthresh = tp->snd_cwnd; - } - } - } -} - -void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, - const struct ack_sample *sample) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); - __u32 delay; - - /* Some calls are for duplicates without timetamps */ - if (sample->rtt_us < 0) - return; - - /* Discard delay samples right after fast recovery */ - if (ca->epoch_start && (__s32)(tcp_jiffies32 - ca->epoch_start) < HZ) - return; - - delay = sample->rtt_us; - if (delay == 0) - delay = 1; - - /* first time call or link delay decreases */ - if (ca->delay_min == 0 || ca->delay_min > delay) - ca->delay_min = delay; - - /* hystart triggers when cwnd is larger than some threshold */ - if (!ca->found && tcp_in_slow_start(tp) && hystart && - tp->snd_cwnd >= hystart_low_window) - hystart_update(sk, delay); -} - -extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym; - -__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk) -{ - return tcp_reno_undo_cwnd(sk); -} - -SEC(".struct_ops") -struct tcp_congestion_ops cubic = { - .init = (void *)bpf_cubic_init, - .ssthresh = (void *)bpf_cubic_recalc_ssthresh, - .cong_avoid = (void *)bpf_cubic_cong_avoid, - .set_state = (void *)bpf_cubic_state, - .undo_cwnd = (void *)bpf_cubic_undo_cwnd, - .cwnd_event = (void *)bpf_cubic_cwnd_event, - .pkts_acked = (void *)bpf_cubic_acked, - .name = "bpf_cubic", -}; diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c deleted file mode 100644 index fd42247da8b4..000000000000 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ /dev/null @@ -1,224 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2019 Facebook */ - -/* WARNING: This implemenation is not necessarily the same - * as the tcp_dctcp.c. The purpose is mainly for testing - * the kernel BPF logic. - */ - -#include -#include -#include -#include -#include -#include -#include -#include "bpf_tcp_helpers.h" - -char _license[] SEC("license") = "GPL"; - -int stg_result = 0; - -struct { - __uint(type, BPF_MAP_TYPE_SK_STORAGE); - __uint(map_flags, BPF_F_NO_PREALLOC); - __type(key, int); - __type(value, int); -} sk_stg_map SEC(".maps"); - -#define DCTCP_MAX_ALPHA 1024U - -struct dctcp { - __u32 old_delivered; - __u32 old_delivered_ce; - __u32 prior_rcv_nxt; - __u32 dctcp_alpha; - __u32 next_seq; - __u32 ce_state; - __u32 loss_cwnd; -}; - -static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */ -static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA; - -static __always_inline void dctcp_reset(const struct tcp_sock *tp, - struct dctcp *ca) -{ - ca->next_seq = tp->snd_nxt; - - ca->old_delivered = tp->delivered; - ca->old_delivered_ce = tp->delivered_ce; -} - -SEC("struct_ops/dctcp_init") -void BPF_PROG(dctcp_init, struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct dctcp *ca = inet_csk_ca(sk); - int *stg; - - ca->prior_rcv_nxt = tp->rcv_nxt; - ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); - ca->loss_cwnd = 0; - ca->ce_state = 0; - - stg = bpf_sk_storage_get(&sk_stg_map, (void *)tp, NULL, 0); - if (stg) { - stg_result = *stg; - bpf_sk_storage_delete(&sk_stg_map, (void *)tp); - } - dctcp_reset(tp, ca); -} - -SEC("struct_ops/dctcp_ssthresh") -__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk) -{ - struct dctcp *ca = inet_csk_ca(sk); - struct tcp_sock *tp = tcp_sk(sk); - - ca->loss_cwnd = tp->snd_cwnd; - return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); -} - -SEC("struct_ops/dctcp_update_alpha") -void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct dctcp *ca = inet_csk_ca(sk); - - /* Expired RTT */ - if (!before(tp->snd_una, ca->next_seq)) { - __u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce; - __u32 alpha = ca->dctcp_alpha; - - /* alpha = (1 - g) * alpha + g * F */ - - alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); - if (delivered_ce) { - __u32 delivered = tp->delivered - ca->old_delivered; - - /* If dctcp_shift_g == 1, a 32bit value would overflow - * after 8 M packets. - */ - delivered_ce <<= (10 - dctcp_shift_g); - delivered_ce /= max(1U, delivered); - - alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA); - } - ca->dctcp_alpha = alpha; - dctcp_reset(tp, ca); - } -} - -static __always_inline void dctcp_react_to_loss(struct sock *sk) -{ - struct dctcp *ca = inet_csk_ca(sk); - struct tcp_sock *tp = tcp_sk(sk); - - ca->loss_cwnd = tp->snd_cwnd; - tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); -} - -SEC("struct_ops/dctcp_state") -void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state) -{ - if (new_state == TCP_CA_Recovery && - new_state != BPF_CORE_READ_BITFIELD(inet_csk(sk), icsk_ca_state)) - dctcp_react_to_loss(sk); - /* We handle RTO in dctcp_cwnd_event to ensure that we perform only - * one loss-adjustment per RTT. - */ -} - -static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (ce_state == 1) - tp->ecn_flags |= TCP_ECN_DEMAND_CWR; - else - tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; -} - -/* Minimal DCTP CE state machine: - * - * S: 0 <- last pkt was non-CE - * 1 <- last pkt was CE - */ -static __always_inline -void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt, - __u32 *prior_rcv_nxt, __u32 *ce_state) -{ - __u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0; - - if (*ce_state != new_ce_state) { - /* CE state has changed, force an immediate ACK to - * reflect the new CE state. If an ACK was delayed, - * send that first to reflect the prior CE state. - */ - if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { - dctcp_ece_ack_cwr(sk, *ce_state); - bpf_tcp_send_ack(sk, *prior_rcv_nxt); - } - inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; - } - *prior_rcv_nxt = tcp_sk(sk)->rcv_nxt; - *ce_state = new_ce_state; - dctcp_ece_ack_cwr(sk, new_ce_state); -} - -SEC("struct_ops/dctcp_cwnd_event") -void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) -{ - struct dctcp *ca = inet_csk_ca(sk); - - switch (ev) { - case CA_EVENT_ECN_IS_CE: - case CA_EVENT_ECN_NO_CE: - dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state); - break; - case CA_EVENT_LOSS: - dctcp_react_to_loss(sk); - break; - default: - /* Don't care for the rest. */ - break; - } -} - -SEC("struct_ops/dctcp_cwnd_undo") -__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk) -{ - const struct dctcp *ca = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); -} - -extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym; - -SEC("struct_ops/dctcp_reno_cong_avoid") -void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) -{ - tcp_reno_cong_avoid(sk, ack, acked); -} - -SEC(".struct_ops") -struct tcp_congestion_ops dctcp_nouse = { - .init = (void *)dctcp_init, - .set_state = (void *)dctcp_state, - .flags = TCP_CONG_NEEDS_ECN, - .name = "bpf_dctcp_nouse", -}; - -SEC(".struct_ops") -struct tcp_congestion_ops dctcp = { - .init = (void *)dctcp_init, - .in_ack_event = (void *)dctcp_update_alpha, - .cwnd_event = (void *)dctcp_cwnd_event, - .ssthresh = (void *)dctcp_ssthresh, - .cong_avoid = (void *)dctcp_cong_avoid, - .undo_cwnd = (void *)dctcp_cwnd_undo, - .set_state = (void *)dctcp_state, - .flags = TCP_CONG_NEEDS_ECN, - .name = "bpf_dctcp", -}; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c deleted file mode 100644 index 470f8723e463..000000000000 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ /dev/null @@ -1,47 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ -#include -#include -#include "bpf_tcp_helpers.h" - -extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; -extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, - __u32 c, __u64 d) __ksym; - -SEC("classifier") -int kfunc_call_test2(struct __sk_buff *skb) -{ - struct bpf_sock *sk = skb->sk; - - if (!sk) - return -1; - - sk = bpf_sk_fullsock(sk); - if (!sk) - return -1; - - return bpf_kfunc_call_test2((struct sock *)sk, 1, 2); -} - -SEC("classifier") -int kfunc_call_test1(struct __sk_buff *skb) -{ - struct bpf_sock *sk = skb->sk; - __u64 a = 1ULL << 32; - __u32 ret; - - if (!sk) - return -1; - - sk = bpf_sk_fullsock(sk); - if (!sk) - return -1; - - a = bpf_kfunc_call_test1((struct sock *)sk, 1, a | 2, 3, a | 4); - ret = a >> 32; /* ret should be 2 */ - ret += (__u32)a; /* ret should be 12 */ - - return ret; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c deleted file mode 100644 index b2dcb7d9cb03..000000000000 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ -#include -#include -#include "bpf_tcp_helpers.h" - -extern const int bpf_prog_active __ksym; -extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, - __u32 c, __u64 d) __ksym; -extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; -int active_res = -1; -int sk_state = -1; - -int __noinline f1(struct __sk_buff *skb) -{ - struct bpf_sock *sk = skb->sk; - int *active; - - if (!sk) - return -1; - - sk = bpf_sk_fullsock(sk); - if (!sk) - return -1; - - active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, - bpf_get_smp_processor_id()); - if (active) - active_res = *active; - - sk_state = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state; - - return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4); -} - -SEC("classifier") -int kfunc_call_test1(struct __sk_buff *skb) -{ - return f1(skb); -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c deleted file mode 100644 index b964ec1390c2..000000000000 --- a/tools/testing/selftests/bpf/progs/linked_funcs1.c +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include "vmlinux.h" -#include -#include - -/* weak and shared between two files */ -const volatile int my_tid __weak; -long syscall_id __weak; - -int output_val1; -int output_ctx1; -int output_weak1; - -/* same "subprog" name in all files, but it's ok because they all are static */ -static __noinline int subprog(int x) -{ - /* but different formula */ - return x * 1; -} - -/* Global functions can't be void */ -int set_output_val1(int x) -{ - output_val1 = x + subprog(x); - return x; -} - -/* This function can't be verified as global, as it assumes raw_tp/sys_enter - * context and accesses syscall id (second argument). So we mark it as - * __hidden, so that libbpf will mark it as static in the final object file, - * right before verifying it in the kernel. - * - * But we don't mark it as __hidden here, rather at extern site. __hidden is - * "contaminating" visibility, so it will get propagated from either extern or - * actual definition (including from the losing __weak definition). - */ -void set_output_ctx1(__u64 *ctx) -{ - output_ctx1 = ctx[1]; /* long id, same as in BPF_PROG below */ -} - -/* this weak instance should win because it's the first one */ -__weak int set_output_weak(int x) -{ - output_weak1 = x; - return x; -} - -extern int set_output_val2(int x); - -/* here we'll force set_output_ctx2() to be __hidden in the final obj file */ -__hidden extern void set_output_ctx2(__u64 *ctx); - -SEC("raw_tp/sys_enter") -int BPF_PROG(handler1, struct pt_regs *regs, long id) -{ - if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) - return 0; - - set_output_val2(1000); - set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */ - - /* keep input value the same across both files to avoid dependency on - * handler call order; differentiate by output_weak1 vs output_weak2. - */ - set_output_weak(42); - - return 0; -} - -char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c deleted file mode 100644 index 575e958e60b7..000000000000 --- a/tools/testing/selftests/bpf/progs/linked_funcs2.c +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include "vmlinux.h" -#include -#include - -/* weak and shared between both files */ -const volatile int my_tid __weak; -long syscall_id __weak; - -int output_val2; -int output_ctx2; -int output_weak2; /* should stay zero */ - -/* same "subprog" name in all files, but it's ok because they all are static */ -static __noinline int subprog(int x) -{ - /* but different formula */ - return x * 2; -} - -/* Global functions can't be void */ -int set_output_val2(int x) -{ - output_val2 = 2 * x + 2 * subprog(x); - return 2 * x; -} - -/* This function can't be verified as global, as it assumes raw_tp/sys_enter - * context and accesses syscall id (second argument). So we mark it as - * __hidden, so that libbpf will mark it as static in the final object file, - * right before verifying it in the kernel. - * - * But we don't mark it as __hidden here, rather at extern site. __hidden is - * "contaminating" visibility, so it will get propagated from either extern or - * actual definition (including from the losing __weak definition). - */ -void set_output_ctx2(__u64 *ctx) -{ - output_ctx2 = ctx[1]; /* long id, same as in BPF_PROG below */ -} - -/* this weak instance should lose, because it will be processed second */ -__weak int set_output_weak(int x) -{ - output_weak2 = x; - return 2 * x; -} - -extern int set_output_val1(int x); - -/* here we'll force set_output_ctx1() to be __hidden in the final obj file */ -__hidden extern void set_output_ctx1(__u64 *ctx); - -SEC("raw_tp/sys_enter") -int BPF_PROG(handler2, struct pt_regs *regs, long id) -{ - if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) - return 0; - - set_output_val1(2000); - set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */ - - /* keep input value the same across both files to avoid dependency on - * handler call order; differentiate by output_weak1 vs output_weak2. - */ - set_output_weak(42); - - return 0; -} - -char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_maps2.c b/tools/testing/selftests/bpf/progs/linked_maps2.c deleted file mode 100644 index 0693687474ed..000000000000 --- a/tools/testing/selftests/bpf/progs/linked_maps2.c +++ /dev/null @@ -1,76 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include "vmlinux.h" -#include -#include - -/* modifiers and typedefs are ignored when comparing key/value types */ -typedef struct my_key { long x; } key_type; -typedef struct my_value { long x; } value_type; - -extern struct { - __uint(max_entries, 16); - __type(key, key_type); - __type(value, value_type); - __uint(type, BPF_MAP_TYPE_HASH); -} map1 SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, int); - __type(value, int); - __uint(max_entries, 8); -} map2 SEC(".maps"); - -/* this definition will lose, but it has to exactly match the winner */ -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, int); - __type(value, int); - __uint(max_entries, 16); -} map_weak __weak SEC(".maps"); - -int output_first2; -int output_second2; -int output_weak2; - -SEC("raw_tp/sys_enter") -int BPF_PROG(handler_enter2) -{ - /* update values with key = 2 */ - int key = 2, val = 2; - key_type key_struct = { .x = 2 }; - value_type val_struct = { .x = 2000 }; - - bpf_map_update_elem(&map1, &key_struct, &val_struct, 0); - bpf_map_update_elem(&map2, &key, &val, 0); - bpf_map_update_elem(&map_weak, &key, &val, 0); - - return 0; -} - -SEC("raw_tp/sys_exit") -int BPF_PROG(handler_exit2) -{ - /* lookup values with key = 1, set in another file */ - int key = 1, *val; - key_type key_struct = { .x = 1 }; - value_type *value_struct; - - value_struct = bpf_map_lookup_elem(&map1, &key_struct); - if (value_struct) - output_first2 = value_struct->x; - - val = bpf_map_lookup_elem(&map2, &key); - if (val) - output_second2 = *val; - - val = bpf_map_lookup_elem(&map_weak, &key); - if (val) - output_weak2 = *val; - - return 0; -} - -char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_vars1.c b/tools/testing/selftests/bpf/progs/linked_vars1.c deleted file mode 100644 index ef9e9d0bb0ca..000000000000 --- a/tools/testing/selftests/bpf/progs/linked_vars1.c +++ /dev/null @@ -1,54 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include "vmlinux.h" -#include -#include - -extern int LINUX_KERNEL_VERSION __kconfig; -/* this weak extern will be strict due to the other file's strong extern */ -extern bool CONFIG_BPF_SYSCALL __kconfig __weak; -extern const void bpf_link_fops __ksym __weak; - -int input_bss1; -int input_data1 = 1; -const volatile int input_rodata1 = 11; - -int input_bss_weak __weak; -/* these two definitions should win */ -int input_data_weak __weak = 10; -const volatile int input_rodata_weak __weak = 100; - -extern int input_bss2; -extern int input_data2; -extern const int input_rodata2; - -int output_bss1; -int output_data1; -int output_rodata1; - -long output_sink1; - -static __noinline int get_bss_res(void) -{ - /* just make sure all the relocations work against .text as well */ - return input_bss1 + input_bss2 + input_bss_weak; -} - -SEC("raw_tp/sys_enter") -int BPF_PROG(handler1) -{ - output_bss1 = get_bss_res(); - output_data1 = input_data1 + input_data2 + input_data_weak; - output_rodata1 = input_rodata1 + input_rodata2 + input_rodata_weak; - - /* make sure we actually use above special externs, otherwise compiler - * will optimize them out - */ - output_sink1 = LINUX_KERNEL_VERSION - + CONFIG_BPF_SYSCALL - + (long)&bpf_link_fops; - return 0; -} - -char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_vars2.c b/tools/testing/selftests/bpf/progs/linked_vars2.c deleted file mode 100644 index e4f5bd388a3c..000000000000 --- a/tools/testing/selftests/bpf/progs/linked_vars2.c +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include "vmlinux.h" -#include -#include - -extern int LINUX_KERNEL_VERSION __kconfig; -/* when an extern is defined as both strong and weak, resulting symbol will be strong */ -extern bool CONFIG_BPF_SYSCALL __kconfig; -extern const void __start_BTF __ksym; - -int input_bss2; -int input_data2 = 2; -const volatile int input_rodata2 = 22; - -int input_bss_weak __weak; -/* these two weak variables should lose */ -int input_data_weak __weak = 20; -const volatile int input_rodata_weak __weak = 200; - -extern int input_bss1; -extern int input_data1; -extern const int input_rodata1; - -int output_bss2; -int output_data2; -int output_rodata2; - -int output_sink2; - -static __noinline int get_data_res(void) -{ - /* just make sure all the relocations work against .text as well */ - return input_data1 + input_data2 + input_data_weak; -} - -SEC("raw_tp/sys_enter") -int BPF_PROG(handler2) -{ - output_bss2 = input_bss1 + input_bss2 + input_bss_weak; - output_data2 = get_data_res(); - output_rodata2 = input_rodata1 + input_rodata2 + input_rodata_weak; - - /* make sure we actually use above special externs, otherwise compiler - * will optimize them out - */ - output_sink2 = LINUX_KERNEL_VERSION - + CONFIG_BPF_SYSCALL - + (long)&__start_BTF; - - return 0; -} - -char LICENSE[] SEC("license") = "GPL";