Documentation/admin-guide/kernel-parameters.txt | 9 + Kconfig | 2 + Kconfig.redhat | 17 + Makefile | 12 +- arch/arm/Kconfig | 4 +- arch/arm64/Kconfig | 3 +- arch/arm64/kernel/acpi.c | 4 + arch/s390/include/asm/ipl.h | 1 + arch/s390/kernel/ipl.c | 5 + arch/s390/kernel/setup.c | 4 + arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/setup.c | 69 ++- drivers/acpi/apei/hest.c | 8 + drivers/acpi/irq.c | 17 +- drivers/acpi/scan.c | 9 + drivers/ata/libahci.c | 18 + drivers/char/ipmi/ipmi_dmi.c | 15 + drivers/char/ipmi/ipmi_msghandler.c | 16 +- drivers/firmware/efi/Makefile | 1 + drivers/firmware/efi/efi.c | 124 +++-- drivers/firmware/efi/secureboot.c | 38 ++ drivers/hid/hid-rmi.c | 64 --- drivers/hwtracing/coresight/coresight-etm4x-core.c | 19 + drivers/infiniband/sw/rxe/rxe.c | 2 + drivers/input/rmi4/rmi_driver.c | 124 +++-- drivers/iommu/iommu.c | 22 + drivers/message/fusion/mptsas.c | 10 + drivers/message/fusion/mptspi.c | 11 + drivers/net/team/team.c | 2 + drivers/net/wireguard/main.c | 7 + drivers/nvme/host/core.c | 22 +- drivers/nvme/host/multipath.c | 19 +- drivers/nvme/host/nvme.h | 4 + drivers/pci/pci-driver.c | 29 ++ drivers/pci/quirks.c | 24 + drivers/pinctrl/intel/pinctrl-tigerlake.c | 26 +- drivers/scsi/aacraid/linit.c | 2 + drivers/scsi/be2iscsi/be_main.c | 2 + drivers/scsi/hpsa.c | 4 + drivers/scsi/lpfc/lpfc_ids.h | 14 + drivers/scsi/megaraid/megaraid_sas_base.c | 2 + drivers/scsi/mpt3sas/mpt3sas_scsih.c | 4 + drivers/scsi/qla2xxx/qla_os.c | 6 + drivers/scsi/qla4xxx/ql4_os.c | 2 + drivers/usb/core/hub.c | 7 + include/linux/efi.h | 22 +- include/linux/kernel.h | 15 + include/linux/lsm_hook_defs.h | 2 + include/linux/lsm_hooks.h | 6 + include/linux/module.h | 1 + include/linux/panic.h | 19 +- include/linux/pci.h | 4 + include/linux/rh_kabi.h | 297 +++++++++++ include/linux/rmi.h | 1 + include/linux/security.h | 5 + init/Kconfig | 2 +- kernel/Makefile | 1 + kernel/bpf/syscall.c | 18 + kernel/module.c | 2 + kernel/module_signing.c | 9 +- kernel/panic.c | 14 + kernel/rh_taint.c | 93 ++++ kernel/sysctl.c | 5 + mm/cma.c | 10 + scripts/mod/modpost.c | 8 + scripts/tags.sh | 2 + security/integrity/platform_certs/load_uefi.c | 6 +- security/lockdown/Kconfig | 13 + security/lockdown/lockdown.c | 1 + security/security.c | 6 + tools/testing/selftests/bpf/Makefile | 1 - .../selftests/bpf/prog_tests/linked_funcs.c | 42 -- .../testing/selftests/bpf/prog_tests/linked_maps.c | 30 -- .../testing/selftests/bpf/prog_tests/linked_vars.c | 43 -- tools/testing/selftests/bpf/progs/bpf_cubic.c | 545 --------------------- tools/testing/selftests/bpf/progs/bpf_dctcp.c | 224 --------- .../testing/selftests/bpf/progs/kfunc_call_test.c | 47 -- .../selftests/bpf/progs/kfunc_call_test_subprog.c | 42 -- tools/testing/selftests/bpf/progs/linked_funcs1.c | 73 --- tools/testing/selftests/bpf/progs/linked_funcs2.c | 73 --- tools/testing/selftests/bpf/progs/linked_maps2.c | 76 --- tools/testing/selftests/bpf/progs/linked_vars1.c | 54 -- tools/testing/selftests/bpf/progs/linked_vars2.c | 55 --- 83 files changed, 1167 insertions(+), 1505 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bdb22006f713..61a3a4a4730b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5854,6 +5854,15 @@ unknown_nmi_panic [X86] Cause panic on unknown NMI. + unprivileged_bpf_disabled= + Format: { "0" | "1" | "2" } + Sets the initial value of + kernel.unprivileged_bpf_disabled sysctl knob. + 0 - unprivileged bpf() syscall access is enabled. + 1 - unprivileged bpf() syscall access is disabled permanently. + 2 - unprivileged bpf() syscall access is disabled. + Default value is 2. + usbcore.authorized_default= [USB] Default USB device authorization: (default -1 = authorized except for wireless USB, diff --git a/Kconfig b/Kconfig index 745bc773f567..f57ff40109d7 100644 --- a/Kconfig +++ b/Kconfig @@ -30,3 +30,5 @@ source "lib/Kconfig" source "lib/Kconfig.debug" source "Documentation/Kconfig" + +source "Kconfig.redhat" diff --git a/Kconfig.redhat b/Kconfig.redhat new file mode 100644 index 000000000000..effb81d04bfd --- /dev/null +++ b/Kconfig.redhat @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Red Hat specific options +# + +menu "Red Hat options" + +config RHEL_DIFFERENCES + bool "Remove support for deprecated features" + help + Red Hat may choose to deprecate certain features in its kernels. + Enable this option to remove support for hardware that is no + longer supported. + + Unless you want a restricted kernel, say N here. + +endmenu diff --git a/Makefile b/Makefile index eae1314a5b86..a8dcae6b5b85 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,10 @@ $(if $(filter __%, $(MAKECMDGOALS)), \ PHONY := __all __all: +# Set RHEL variables +# Use this spot to avoid future merge conflicts +include Makefile.rhelver + # We are using a recursive build, so we need to do a little thinking # to get the ordering right. # @@ -1256,7 +1260,13 @@ define filechk_version.h ((c) > 255 ? 255 : (c)))'; \ echo \#define LINUX_VERSION_MAJOR $(VERSION); \ echo \#define LINUX_VERSION_PATCHLEVEL $(PATCHLEVEL); \ - echo \#define LINUX_VERSION_SUBLEVEL $(SUBLEVEL) + echo \#define LINUX_VERSION_SUBLEVEL $(SUBLEVEL); \ + echo '#define RHEL_MAJOR $(RHEL_MAJOR)'; \ + echo '#define RHEL_MINOR $(RHEL_MINOR)'; \ + echo '#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b))'; \ + echo '#define RHEL_RELEASE_CODE \ + $(shell expr $(RHEL_MAJOR) \* 256 + $(RHEL_MINOR))'; \ + echo '#define RHEL_RELEASE "$(RHEL_RELEASE)"' endef $(version_h): PATCHLEVEL := $(if $(PATCHLEVEL), $(PATCHLEVEL), 0) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 2fb7012c3246..47718b4f2f75 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1474,9 +1474,9 @@ config HIGHMEM If unsure, say n. config HIGHPTE - bool "Allocate 2nd-level pagetables from highmem" if EXPERT + bool "Allocate 2nd-level pagetables from highmem" depends on HIGHMEM - default y + default n help The VM uses one page of physical memory for each page table. For systems with a lot of processes, this can use a lot of diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fdcd54d39c1e..365b74034428 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -914,7 +914,7 @@ endchoice config ARM64_FORCE_52BIT bool "Force 52-bit virtual addresses for userspace" - depends on ARM64_VA_BITS_52 && EXPERT + depends on ARM64_VA_BITS_52 help For systems with 52-bit userspace VAs enabled, the kernel will attempt to maintain compatibility with older software by providing 48-bit VAs @@ -1155,6 +1155,7 @@ config XEN config FORCE_MAX_ZONEORDER int default "14" if ARM64_64K_PAGES + default "13" if (ARCH_THUNDER && !ARM64_64K_PAGES && !RHEL_DIFFERENCES) default "12" if ARM64_16K_PAGES default "11" help diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index f3851724fe35..ef69eeab6f2a 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -40,7 +40,11 @@ int acpi_pci_disabled = 1; /* skip ACPI PCI scan and IRQ initialization */ EXPORT_SYMBOL(acpi_pci_disabled); static bool param_acpi_off __initdata; +#ifdef CONFIG_RHEL_DIFFERENCES +static bool param_acpi_on __initdata = true; +#else static bool param_acpi_on __initdata; +#endif static bool param_acpi_force __initdata; static int __init parse_acpi(char *arg) diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h index a9e2c7295b35..6ff11f3a2d47 100644 --- a/arch/s390/include/asm/ipl.h +++ b/arch/s390/include/asm/ipl.h @@ -127,6 +127,7 @@ int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf, unsigned char flags, unsigned short cert); int ipl_report_add_certificate(struct ipl_report *report, void *key, unsigned long addr, unsigned long len); +bool ipl_get_secureboot(void); /* * DIAG 308 support diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 50e2c21e0ec9..5da0c683df8d 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2218,3 +2218,8 @@ int ipl_report_free(struct ipl_report *report) } #endif + +bool ipl_get_secureboot(void) +{ + return !!ipl_secure_flag; +} diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index ff0f9e838916..557318323664 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -1023,6 +1024,9 @@ void __init setup_arch(char **cmdline_p) log_component_list(); + if (ipl_get_secureboot()) + security_lock_kernel_down("Secure IPL mode", LOCKDOWN_INTEGRITY_MAX); + /* Have one command line that is parsed and saved in /proc/cmdline */ /* boot_command_line has been already set up in early.c */ *cmdline_p = boot_command_line; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 64b805bd6a54..e6702f1e1beb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1304,6 +1304,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) cpu_detect(c); get_cpu_vendor(c); get_cpu_cap(c); + get_model_name(c); /* RHEL: get model name for unsupported check */ get_cpu_address_sizes(c); setup_force_cpu_cap(X86_FEATURE_CPUID); cpu_parse_early_param(); diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bff3a784aec5..1a9c345f914d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -49,6 +50,7 @@ #include #include #include +#include /* * max_low_pfn_mapped: highest directly mapped pfn < 4 GB @@ -732,6 +734,50 @@ static void __init early_reserve_memory(void) trim_snb_memory(); } +#ifdef CONFIG_RHEL_DIFFERENCES + +static void rh_check_supported(void) +{ + bool guest; + + guest = (x86_hyper_type != X86_HYPER_NATIVE || boot_cpu_has(X86_FEATURE_HYPERVISOR)); + + /* RHEL supports single cpu on guests only */ + if (((boot_cpu_data.x86_max_cores * smp_num_siblings) == 1) && + !guest && is_kdump_kernel()) { + pr_crit("Detected single cpu native boot.\n"); + pr_crit("Important: In this kernel, single threaded, single CPU 64-bit physical systems are unsupported."); + } + + /* + * If the RHEL kernel does not support this hardware, the kernel will + * attempt to boot, but no support is provided for this hardware + */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + case X86_VENDOR_INTEL: + break; + default: + pr_crit("Detected processor %s %s\n", + boot_cpu_data.x86_vendor_id, + boot_cpu_data.x86_model_id); + mark_hardware_unsupported("Processor"); + break; + } + + /* + * Due to the complexity of x86 lapic & ioapic enumeration, and PCI IRQ + * routing, ACPI is required for x86. acpi=off is a valid debug kernel + * parameter, so just print out a loud warning in case something + * goes wrong (which is most of the time). + */ + if (acpi_disabled && !guest) + pr_crit("ACPI has been disabled or is not available on this hardware. This may result in a single cpu boot, incorrect PCI IRQ routing, or boot failure.\n"); +} +#else +#define rh_check_supported() +#endif + /* * Dump out kernel offset information on panic. */ @@ -936,6 +982,13 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled(EFI_BOOT)) efi_init(); + efi_set_secure_boot(boot_params.secure_boot); + +#ifdef CONFIG_LOCK_DOWN_IN_EFI_SECURE_BOOT + if (efi_enabled(EFI_SECURE_BOOT)) + security_lock_kernel_down("EFI Secure Boot mode", LOCKDOWN_INTEGRITY_MAX); +#endif + dmi_setup(); /* @@ -1101,19 +1154,7 @@ void __init setup_arch(char **cmdline_p) /* Allocate bigger log buffer */ setup_log_buf(1); - if (efi_enabled(EFI_BOOT)) { - switch (boot_params.secure_boot) { - case efi_secureboot_mode_disabled: - pr_info("Secure boot disabled\n"); - break; - case efi_secureboot_mode_enabled: - pr_info("Secure boot enabled\n"); - break; - default: - pr_info("Secure boot could not be determined\n"); - break; - } - } + efi_set_secure_boot(boot_params.secure_boot); reserve_initrd(); @@ -1226,6 +1267,8 @@ void __init setup_arch(char **cmdline_p) efi_apply_memmap_quirks(); #endif + rh_check_supported(); + unwind_init(); } diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index 277f00b288d1..adbce15c273d 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -94,6 +94,14 @@ int apei_hest_parse(apei_hest_func_t func, void *data) if (hest_disable || !hest_tab) return -EINVAL; +#ifdef CONFIG_ARM64 + /* Ignore broken firmware */ + if (!strncmp(hest_tab->header.oem_id, "HPE ", 6) && + !strncmp(hest_tab->header.oem_table_id, "ProLiant", 8) && + MIDR_IMPLEMENTOR(read_cpuid_id()) == ARM_CPU_IMP_APM) + return -EINVAL; +#endif + hest_hdr = (struct acpi_hest_header *)(hest_tab + 1); for (i = 0; i < hest_tab->error_source_count; i++) { len = hest_esrc_len(hest_hdr); diff --git a/drivers/acpi/irq.c b/drivers/acpi/irq.c index c68e694fca26..146cba5ae5bc 100644 --- a/drivers/acpi/irq.c +++ b/drivers/acpi/irq.c @@ -130,6 +130,7 @@ struct acpi_irq_parse_one_ctx { unsigned int index; unsigned long *res_flags; struct irq_fwspec *fwspec; + bool skip_producer_check; }; /** @@ -201,7 +202,8 @@ static acpi_status acpi_irq_parse_one_cb(struct acpi_resource *ares, return AE_CTRL_TERMINATE; case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: eirq = &ares->data.extended_irq; - if (eirq->producer_consumer == ACPI_PRODUCER) + if (!ctx->skip_producer_check && + eirq->producer_consumer == ACPI_PRODUCER) return AE_OK; if (ctx->index >= eirq->interrupt_count) { ctx->index -= eirq->interrupt_count; @@ -236,8 +238,19 @@ static acpi_status acpi_irq_parse_one_cb(struct acpi_resource *ares, static int acpi_irq_parse_one(acpi_handle handle, unsigned int index, struct irq_fwspec *fwspec, unsigned long *flags) { - struct acpi_irq_parse_one_ctx ctx = { -EINVAL, index, flags, fwspec }; + struct acpi_irq_parse_one_ctx ctx = { -EINVAL, index, flags, fwspec, false }; + /* + * Firmware on arm64-based HPE m400 platform incorrectly marks + * its UART interrupt as ACPI_PRODUCER rather than ACPI_CONSUMER. + * Don't do the producer/consumer check for that device. + */ + if (IS_ENABLED(CONFIG_ARM64)) { + struct acpi_device *adev = acpi_bus_get_acpi_device(handle); + + if (adev && !strcmp(acpi_device_hid(adev), "APMC0D08")) + ctx.skip_producer_check = true; + } acpi_walk_resources(handle, METHOD_NAME__CRS, acpi_irq_parse_one_cb, &ctx); return ctx.rc; } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index b24513ec3fae..8308569f66e0 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1721,6 +1721,15 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device) if (!acpi_match_device_ids(device, i2c_multi_instantiate_ids)) return false; + /* + * Firmware on some arm64 X-Gene platforms will make the UART + * device appear as both a UART and a slave of that UART. Just + * bail out here for X-Gene UARTs. + */ + if (IS_ENABLED(CONFIG_ARM64) && + !strcmp(acpi_device_hid(device), "APMC0D08")) + return false; + INIT_LIST_HEAD(&resource_list); acpi_dev_get_resources(device, &resource_list, acpi_check_serial_bus_slave, diff --git a/drivers/ata/libahci.c b/drivers/ata/libahci.c index fec2e9754aed..bea4e2973259 100644 --- a/drivers/ata/libahci.c +++ b/drivers/ata/libahci.c @@ -671,6 +671,24 @@ int ahci_stop_engine(struct ata_port *ap) tmp &= ~PORT_CMD_START; writel(tmp, port_mmio + PORT_CMD); +#ifdef CONFIG_ARM64 + /* Rev Ax of Cavium CN99XX needs a hack for port stop */ + if (dev_is_pci(ap->host->dev) && + to_pci_dev(ap->host->dev)->vendor == 0x14e4 && + to_pci_dev(ap->host->dev)->device == 0x9027 && + midr_is_cpu_model_range(read_cpuid_id(), + MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN), + MIDR_CPU_VAR_REV(0, 0), + MIDR_CPU_VAR_REV(0, MIDR_REVISION_MASK))) { + tmp = readl(hpriv->mmio + 0x8000); + udelay(100); + writel(tmp | (1 << 26), hpriv->mmio + 0x8000); + udelay(100); + writel(tmp & ~(1 << 26), hpriv->mmio + 0x8000); + dev_warn(ap->host->dev, "CN99XX SATA reset workaround applied\n"); + } +#endif + /* wait for engine to stop. This could be as long as 500 msec */ tmp = ata_wait_register(ap, port_mmio + PORT_CMD, PORT_CMD_LIST_ON, PORT_CMD_LIST_ON, 1, 500); diff --git a/drivers/char/ipmi/ipmi_dmi.c b/drivers/char/ipmi/ipmi_dmi.c index bbf7029e224b..cf7faa970dd6 100644 --- a/drivers/char/ipmi/ipmi_dmi.c +++ b/drivers/char/ipmi/ipmi_dmi.c @@ -215,6 +215,21 @@ static int __init scan_for_dmi_ipmi(void) { const struct dmi_device *dev = NULL; +#ifdef CONFIG_ARM64 + /* RHEL-only + * If this is ARM-based HPE m400, return now, because that platform + * reports the host-side ipmi address as intel port-io space, which + * does not exist in the ARM architecture. + */ + const char *dmistr = dmi_get_system_info(DMI_PRODUCT_NAME); + + if (dmistr && (strcmp("ProLiant m400 Server", dmistr) == 0)) { + pr_debug("%s does not support host ipmi\n", dmistr); + return 0; + } + /* END RHEL-only */ +#endif + while ((dev = dmi_find_device(DMI_DEV_TYPE_IPMI, NULL, dev))) dmi_decode_ipmi((const struct dmi_header *) dev->device_data); diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index e96cb5c4f97a..d645460fe8a9 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #define IPMI_DRIVER_VERSION "39.2" @@ -5160,8 +5161,21 @@ static int __init ipmi_init_msghandler_mod(void) { int rv; - pr_info("version " IPMI_DRIVER_VERSION "\n"); +#ifdef CONFIG_ARM64 + /* RHEL-only + * If this is ARM-based HPE m400, return now, because that platform + * reports the host-side ipmi address as intel port-io space, which + * does not exist in the ARM architecture. + */ + const char *dmistr = dmi_get_system_info(DMI_PRODUCT_NAME); + if (dmistr && (strcmp("ProLiant m400 Server", dmistr) == 0)) { + pr_debug("%s does not support host ipmi\n", dmistr); + return -ENOSYS; + } + /* END RHEL-only */ +#endif + pr_info("version " IPMI_DRIVER_VERSION "\n"); mutex_lock(&ipmi_interfaces_mutex); rv = ipmi_register_driver(); mutex_unlock(&ipmi_interfaces_mutex); diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index 467e94259679..9b6f5b8e5397 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_EFI_FAKE_MEMMAP) += fake_map.o obj-$(CONFIG_EFI_BOOTLOADER_CONTROL) += efibc.o obj-$(CONFIG_EFI_TEST) += test/ obj-$(CONFIG_EFI_DEV_PATH_PARSER) += dev-path-parser.o +obj-$(CONFIG_EFI) += secureboot.o obj-$(CONFIG_APPLE_PROPERTIES) += apple-properties.o obj-$(CONFIG_EFI_RCI2_TABLE) += rci2-table.o obj-$(CONFIG_EFI_EMBEDDED_FIRMWARE) += embedded-firmware.o diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 847f33ffc4ae..363037f8eaf8 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -31,6 +31,7 @@ #include #include #include +#include #include @@ -841,40 +842,101 @@ int efi_mem_type(unsigned long phys_addr) } #endif +struct efi_error_code { + efi_status_t status; + int errno; + const char *description; +}; + +static const struct efi_error_code efi_error_codes[] = { + { EFI_SUCCESS, 0, "Success"}, +#if 0 + { EFI_LOAD_ERROR, -EPICK_AN_ERRNO, "Load Error"}, +#endif + { EFI_INVALID_PARAMETER, -EINVAL, "Invalid Parameter"}, + { EFI_UNSUPPORTED, -ENOSYS, "Unsupported"}, + { EFI_BAD_BUFFER_SIZE, -ENOSPC, "Bad Buffer Size"}, + { EFI_BUFFER_TOO_SMALL, -ENOSPC, "Buffer Too Small"}, + { EFI_NOT_READY, -EAGAIN, "Not Ready"}, + { EFI_DEVICE_ERROR, -EIO, "Device Error"}, + { EFI_WRITE_PROTECTED, -EROFS, "Write Protected"}, + { EFI_OUT_OF_RESOURCES, -ENOMEM, "Out of Resources"}, +#if 0 + { EFI_VOLUME_CORRUPTED, -EPICK_AN_ERRNO, "Volume Corrupt"}, + { EFI_VOLUME_FULL, -EPICK_AN_ERRNO, "Volume Full"}, + { EFI_NO_MEDIA, -EPICK_AN_ERRNO, "No Media"}, + { EFI_MEDIA_CHANGED, -EPICK_AN_ERRNO, "Media changed"}, +#endif + { EFI_NOT_FOUND, -ENOENT, "Not Found"}, +#if 0 + { EFI_ACCESS_DENIED, -EPICK_AN_ERRNO, "Access Denied"}, + { EFI_NO_RESPONSE, -EPICK_AN_ERRNO, "No Response"}, + { EFI_NO_MAPPING, -EPICK_AN_ERRNO, "No mapping"}, + { EFI_TIMEOUT, -EPICK_AN_ERRNO, "Time out"}, + { EFI_NOT_STARTED, -EPICK_AN_ERRNO, "Not started"}, + { EFI_ALREADY_STARTED, -EPICK_AN_ERRNO, "Already started"}, +#endif + { EFI_ABORTED, -EINTR, "Aborted"}, +#if 0 + { EFI_ICMP_ERROR, -EPICK_AN_ERRNO, "ICMP Error"}, + { EFI_TFTP_ERROR, -EPICK_AN_ERRNO, "TFTP Error"}, + { EFI_PROTOCOL_ERROR, -EPICK_AN_ERRNO, "Protocol Error"}, + { EFI_INCOMPATIBLE_VERSION, -EPICK_AN_ERRNO, "Incompatible Version"}, +#endif + { EFI_SECURITY_VIOLATION, -EACCES, "Security Policy Violation"}, +#if 0 + { EFI_CRC_ERROR, -EPICK_AN_ERRNO, "CRC Error"}, + { EFI_END_OF_MEDIA, -EPICK_AN_ERRNO, "End of Media"}, + { EFI_END_OF_FILE, -EPICK_AN_ERRNO, "End of File"}, + { EFI_INVALID_LANGUAGE, -EPICK_AN_ERRNO, "Invalid Languages"}, + { EFI_COMPROMISED_DATA, -EPICK_AN_ERRNO, "Compromised Data"}, + + // warnings + { EFI_WARN_UNKOWN_GLYPH, -EPICK_AN_ERRNO, "Warning Unknown Glyph"}, + { EFI_WARN_DELETE_FAILURE, -EPICK_AN_ERRNO, "Warning Delete Failure"}, + { EFI_WARN_WRITE_FAILURE, -EPICK_AN_ERRNO, "Warning Write Failure"}, + { EFI_WARN_BUFFER_TOO_SMALL, -EPICK_AN_ERRNO, "Warning Buffer Too Small"}, +#endif +}; + +static int +efi_status_cmp_bsearch(const void *key, const void *item) +{ + u64 status = (u64)(uintptr_t)key; + struct efi_error_code *code = (struct efi_error_code *)item; + + if (status < code->status) + return -1; + if (status > code->status) + return 1; + return 0; +} + int efi_status_to_err(efi_status_t status) { - int err; - - switch (status) { - case EFI_SUCCESS: - err = 0; - break; - case EFI_INVALID_PARAMETER: - err = -EINVAL; - break; - case EFI_OUT_OF_RESOURCES: - err = -ENOSPC; - break; - case EFI_DEVICE_ERROR: - err = -EIO; - break; - case EFI_WRITE_PROTECTED: - err = -EROFS; - break; - case EFI_SECURITY_VIOLATION: - err = -EACCES; - break; - case EFI_NOT_FOUND: - err = -ENOENT; - break; - case EFI_ABORTED: - err = -EINTR; - break; - default: - err = -EINVAL; - } + struct efi_error_code *found; + size_t num = sizeof(efi_error_codes) / sizeof(struct efi_error_code); - return err; + found = bsearch((void *)(uintptr_t)status, efi_error_codes, + sizeof(struct efi_error_code), num, + efi_status_cmp_bsearch); + if (!found) + return -EINVAL; + return found->errno; +} + +const char * +efi_status_to_str(efi_status_t status) +{ + struct efi_error_code *found; + size_t num = sizeof(efi_error_codes) / sizeof(struct efi_error_code); + + found = bsearch((void *)(uintptr_t)status, efi_error_codes, + sizeof(struct efi_error_code), num, + efi_status_cmp_bsearch); + if (!found) + return "Unknown error code"; + return found->description; } static DEFINE_SPINLOCK(efi_mem_reserve_persistent_lock); diff --git a/drivers/firmware/efi/secureboot.c b/drivers/firmware/efi/secureboot.c new file mode 100644 index 000000000000..de0a3714a5d4 --- /dev/null +++ b/drivers/firmware/efi/secureboot.c @@ -0,0 +1,38 @@ +/* Core kernel secure boot support. + * + * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +/* + * Decide what to do when UEFI secure boot mode is enabled. + */ +void __init efi_set_secure_boot(enum efi_secureboot_mode mode) +{ + if (efi_enabled(EFI_BOOT)) { + switch (mode) { + case efi_secureboot_mode_disabled: + pr_info("Secure boot disabled\n"); + break; + case efi_secureboot_mode_enabled: + set_bit(EFI_SECURE_BOOT, &efi.flags); + pr_info("Secure boot enabled\n"); + break; + default: + pr_warn("Secure boot could not be determined (mode %u)\n", + mode); + break; + } + } +} diff --git a/drivers/hid/hid-rmi.c b/drivers/hid/hid-rmi.c index 311eee599ce9..2460c6bd46f8 100644 --- a/drivers/hid/hid-rmi.c +++ b/drivers/hid/hid-rmi.c @@ -322,19 +322,12 @@ static int rmi_input_event(struct hid_device *hdev, u8 *data, int size) { struct rmi_data *hdata = hid_get_drvdata(hdev); struct rmi_device *rmi_dev = hdata->xport.rmi_dev; - unsigned long flags; if (!(test_bit(RMI_STARTED, &hdata->flags))) return 0; - local_irq_save(flags); - rmi_set_attn_data(rmi_dev, data[1], &data[2], size - 2); - generic_handle_irq(hdata->rmi_irq); - - local_irq_restore(flags); - return 1; } @@ -591,56 +584,6 @@ static const struct rmi_transport_ops hid_rmi_ops = { .reset = rmi_hid_reset, }; -static void rmi_irq_teardown(void *data) -{ - struct rmi_data *hdata = data; - struct irq_domain *domain = hdata->domain; - - if (!domain) - return; - - irq_dispose_mapping(irq_find_mapping(domain, 0)); - - irq_domain_remove(domain); - hdata->domain = NULL; - hdata->rmi_irq = 0; -} - -static int rmi_irq_map(struct irq_domain *h, unsigned int virq, - irq_hw_number_t hw_irq_num) -{ - irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq); - - return 0; -} - -static const struct irq_domain_ops rmi_irq_ops = { - .map = rmi_irq_map, -}; - -static int rmi_setup_irq_domain(struct hid_device *hdev) -{ - struct rmi_data *hdata = hid_get_drvdata(hdev); - int ret; - - hdata->domain = irq_domain_create_linear(hdev->dev.fwnode, 1, - &rmi_irq_ops, hdata); - if (!hdata->domain) - return -ENOMEM; - - ret = devm_add_action_or_reset(&hdev->dev, &rmi_irq_teardown, hdata); - if (ret) - return ret; - - hdata->rmi_irq = irq_create_mapping(hdata->domain, 0); - if (hdata->rmi_irq <= 0) { - hid_err(hdev, "Can't allocate an IRQ\n"); - return hdata->rmi_irq < 0 ? hdata->rmi_irq : -ENXIO; - } - - return 0; -} - static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id) { struct rmi_data *data = NULL; @@ -713,18 +656,11 @@ static int rmi_probe(struct hid_device *hdev, const struct hid_device_id *id) mutex_init(&data->page_mutex); - ret = rmi_setup_irq_domain(hdev); - if (ret) { - hid_err(hdev, "failed to allocate IRQ domain\n"); - return ret; - } - if (data->device_flags & RMI_DEVICE_HAS_PHYS_BUTTONS) rmi_hid_pdata.gpio_data.disable = true; data->xport.dev = hdev->dev.parent; data->xport.pdata = rmi_hid_pdata; - data->xport.pdata.irq = data->rmi_irq; data->xport.proto_name = "hid"; data->xport.ops = &hid_rmi_ops; diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index da27cd4a3c38..5404b4c407c5 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -2071,6 +2072,16 @@ static const struct amba_id etm4_ids[] = { {}, }; +static const struct dmi_system_id broken_coresight[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HPE"), + DMI_MATCH(DMI_PRODUCT_NAME, "Apollo 70"), + }, + }, + { } /* terminating entry */ +}; + MODULE_DEVICE_TABLE(amba, etm4_ids); static struct amba_driver etm4x_amba_driver = { @@ -2104,6 +2115,11 @@ static int __init etm4x_init(void) { int ret; + if (dmi_check_system(broken_coresight)) { + pr_info("ETM4 disabled due to firmware bug\n"); + return 0; + } + ret = etm4_pm_setup(); /* etm4_pm_setup() does its own cleanup - exit on error */ @@ -2130,6 +2146,9 @@ static int __init etm4x_init(void) static void __exit etm4x_exit(void) { + if (dmi_check_system(broken_coresight)) + return; + amba_driver_unregister(&etm4x_amba_driver); platform_driver_unregister(&etm4_platform_driver); etm4_pm_clear(); diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 8e0f9c489cab..e8c5ad07be7a 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -285,6 +285,8 @@ static int __init rxe_module_init(void) { int err; + mark_tech_preview("Soft-RoCE Transport Driver", THIS_MODULE); + err = rxe_net_init(); if (err) return err; diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index 258d5fe3d395..f7298e3dc8f3 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -182,34 +182,47 @@ void rmi_set_attn_data(struct rmi_device *rmi_dev, unsigned long irq_status, attn_data.data = fifo_data; kfifo_put(&drvdata->attn_fifo, attn_data); + + schedule_work(&drvdata->attn_work); } EXPORT_SYMBOL_GPL(rmi_set_attn_data); -static irqreturn_t rmi_irq_fn(int irq, void *dev_id) +static void attn_callback(struct work_struct *work) { - struct rmi_device *rmi_dev = dev_id; - struct rmi_driver_data *drvdata = dev_get_drvdata(&rmi_dev->dev); + struct rmi_driver_data *drvdata = container_of(work, + struct rmi_driver_data, + attn_work); struct rmi4_attn_data attn_data = {0}; int ret, count; count = kfifo_get(&drvdata->attn_fifo, &attn_data); - if (count) { - *(drvdata->irq_status) = attn_data.irq_status; - drvdata->attn_data = attn_data; - } + if (!count) + return; - ret = rmi_process_interrupt_requests(rmi_dev); + *(drvdata->irq_status) = attn_data.irq_status; + drvdata->attn_data = attn_data; + + ret = rmi_process_interrupt_requests(drvdata->rmi_dev); if (ret) - rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev, + rmi_dbg(RMI_DEBUG_CORE, &drvdata->rmi_dev->dev, "Failed to process interrupt request: %d\n", ret); - if (count) { - kfree(attn_data.data); - drvdata->attn_data.data = NULL; - } + kfree(attn_data.data); + drvdata->attn_data.data = NULL; if (!kfifo_is_empty(&drvdata->attn_fifo)) - return rmi_irq_fn(irq, dev_id); + schedule_work(&drvdata->attn_work); +} + +static irqreturn_t rmi_irq_fn(int irq, void *dev_id) +{ + struct rmi_device *rmi_dev = dev_id; + int ret; + + ret = rmi_process_interrupt_requests(rmi_dev); + if (ret) + rmi_dbg(RMI_DEBUG_CORE, &rmi_dev->dev, + "Failed to process interrupt request: %d\n", ret); return IRQ_HANDLED; } @@ -217,7 +230,6 @@ static irqreturn_t rmi_irq_fn(int irq, void *dev_id) static int rmi_irq_init(struct rmi_device *rmi_dev) { struct rmi_device_platform_data *pdata = rmi_get_platform_data(rmi_dev); - struct rmi_driver_data *data = dev_get_drvdata(&rmi_dev->dev); int irq_flags = irq_get_trigger_type(pdata->irq); int ret; @@ -235,8 +247,6 @@ static int rmi_irq_init(struct rmi_device *rmi_dev) return ret; } - data->enabled = true; - return 0; } @@ -886,23 +896,27 @@ void rmi_enable_irq(struct rmi_device *rmi_dev, bool clear_wake) if (data->enabled) goto out; - enable_irq(irq); - data->enabled = true; - if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) { - retval = disable_irq_wake(irq); - if (retval) - dev_warn(&rmi_dev->dev, - "Failed to disable irq for wake: %d\n", - retval); - } + if (irq) { + enable_irq(irq); + data->enabled = true; + if (clear_wake && device_may_wakeup(rmi_dev->xport->dev)) { + retval = disable_irq_wake(irq); + if (retval) + dev_warn(&rmi_dev->dev, + "Failed to disable irq for wake: %d\n", + retval); + } - /* - * Call rmi_process_interrupt_requests() after enabling irq, - * otherwise we may lose interrupt on edge-triggered systems. - */ - irq_flags = irq_get_trigger_type(pdata->irq); - if (irq_flags & IRQ_TYPE_EDGE_BOTH) - rmi_process_interrupt_requests(rmi_dev); + /* + * Call rmi_process_interrupt_requests() after enabling irq, + * otherwise we may lose interrupt on edge-triggered systems. + */ + irq_flags = irq_get_trigger_type(pdata->irq); + if (irq_flags & IRQ_TYPE_EDGE_BOTH) + rmi_process_interrupt_requests(rmi_dev); + } else { + data->enabled = true; + } out: mutex_unlock(&data->enabled_mutex); @@ -922,20 +936,22 @@ void rmi_disable_irq(struct rmi_device *rmi_dev, bool enable_wake) goto out; data->enabled = false; - disable_irq(irq); - if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) { - retval = enable_irq_wake(irq); - if (retval) - dev_warn(&rmi_dev->dev, - "Failed to enable irq for wake: %d\n", - retval); - } - - /* make sure the fifo is clean */ - while (!kfifo_is_empty(&data->attn_fifo)) { - count = kfifo_get(&data->attn_fifo, &attn_data); - if (count) - kfree(attn_data.data); + if (irq) { + disable_irq(irq); + if (enable_wake && device_may_wakeup(rmi_dev->xport->dev)) { + retval = enable_irq_wake(irq); + if (retval) + dev_warn(&rmi_dev->dev, + "Failed to enable irq for wake: %d\n", + retval); + } + } else { + /* make sure the fifo is clean */ + while (!kfifo_is_empty(&data->attn_fifo)) { + count = kfifo_get(&data->attn_fifo, &attn_data); + if (count) + kfree(attn_data.data); + } } out: @@ -981,6 +997,8 @@ static int rmi_driver_remove(struct device *dev) irq_domain_remove(data->irqdomain); data->irqdomain = NULL; + cancel_work_sync(&data->attn_work); + rmi_f34_remove_sysfs(rmi_dev); rmi_free_function_list(rmi_dev); @@ -1219,9 +1237,15 @@ static int rmi_driver_probe(struct device *dev) } } - retval = rmi_irq_init(rmi_dev); - if (retval < 0) - goto err_destroy_functions; + if (pdata->irq) { + retval = rmi_irq_init(rmi_dev); + if (retval < 0) + goto err_destroy_functions; + } + + data->enabled = true; + + INIT_WORK(&data->attn_work, attn_callback); if (data->f01_container->dev.driver) { /* Driver already bound, so enable ATTN now. */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 5419c4b9f27a..3bce0190f0cd 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "iommu: " fmt #include +#include #include #include #include @@ -3036,6 +3037,27 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle) } EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); +#ifdef CONFIG_ARM64 +static int __init iommu_quirks(void) +{ + const char *vendor, *name; + + vendor = dmi_get_system_info(DMI_SYS_VENDOR); + name = dmi_get_system_info(DMI_PRODUCT_NAME); + + if (vendor && + (strncmp(vendor, "GIGABYTE", 8) == 0 && name && + (strncmp(name, "R120", 4) == 0 || + strncmp(name, "R270", 4) == 0))) { + pr_warn("Gigabyte %s detected, force iommu passthrough mode", name); + iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; + } + + return 0; +} +arch_initcall(iommu_quirks); +#endif + /* * Changes the default domain of an iommu group that has *only* one device * diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c index 85285ba8e817..b8109517c4ef 100644 --- a/drivers/message/fusion/mptsas.c +++ b/drivers/message/fusion/mptsas.c @@ -5318,6 +5318,11 @@ mptsas_probe(struct pci_dev *pdev, const struct pci_device_id *id) ioc, MPI_SAS_OP_CLEAR_ALL_PERSISTENT); } +#ifdef CONFIG_RHEL_DIFFERENCES + add_taint(TAINT_SUPPORT_REMOVED, LOCKDEP_STILL_OK); + pr_warn("MPTSAS MODULE IS NOT SUPPORTED\n"); +#endif + error = scsi_add_host(sh, &ioc->pcidev->dev); if (error) { dprintk(ioc, printk(MYIOC_s_ERR_FMT @@ -5381,6 +5386,10 @@ static void mptsas_remove(struct pci_dev *pdev) } static struct pci_device_id mptsas_pci_table[] = { +#ifdef CONFIG_RHEL_DIFFERENCES + { PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_SAS1068, + PCI_VENDOR_ID_VMWARE, PCI_ANY_ID }, +#else { PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_SAS1064, PCI_ANY_ID, PCI_ANY_ID }, { PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_SAS1068, @@ -5393,6 +5402,7 @@ static struct pci_device_id mptsas_pci_table[] = { PCI_ANY_ID, PCI_ANY_ID }, { PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_SAS1068_820XELP, PCI_ANY_ID, PCI_ANY_ID }, +#endif {0} /* Terminating entry */ }; MODULE_DEVICE_TABLE(pci, mptsas_pci_table); diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c index af0ce5611e4a..1c226920c12d 100644 --- a/drivers/message/fusion/mptspi.c +++ b/drivers/message/fusion/mptspi.c @@ -1238,12 +1238,17 @@ static struct spi_function_template mptspi_transport_functions = { */ static struct pci_device_id mptspi_pci_table[] = { +#ifdef CONFIG_RHEL_DIFFERENCES + { PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_53C1030, + PCI_VENDOR_ID_VMWARE, PCI_ANY_ID }, +#else { PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_53C1030, PCI_ANY_ID, PCI_ANY_ID }, { PCI_VENDOR_ID_ATTO, MPI_MANUFACTPAGE_DEVID_53C1030, PCI_ANY_ID, PCI_ANY_ID }, { PCI_VENDOR_ID_LSI_LOGIC, MPI_MANUFACTPAGE_DEVID_53C1035, PCI_ANY_ID, PCI_ANY_ID }, +#endif {0} /* Terminating entry */ }; MODULE_DEVICE_TABLE(pci, mptspi_pci_table); @@ -1534,6 +1539,12 @@ mptspi_probe(struct pci_dev *pdev, const struct pci_device_id *id) 0, 0, 0, 0, 5); scsi_scan_host(sh); + +#ifdef CONFIG_RHEL_DIFFERENCES + add_taint(TAINT_SUPPORT_REMOVED, LOCKDEP_STILL_OK); + pr_warn("MPTSPI MODULE IS NOT SUPPORTED\n"); +#endif + return 0; out_mptspi_probe: diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index dd7917cab2b1..f6b43adb2ced 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -3045,6 +3045,8 @@ static int __init team_module_init(void) if (err) goto err_nl_init; + mark_hardware_deprecated(DRV_NAME); + return 0; err_nl_init: diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c index 75dbe77b0b4b..029ff8576f8e 100644 --- a/drivers/net/wireguard/main.c +++ b/drivers/net/wireguard/main.c @@ -12,6 +12,7 @@ #include +#include #include #include #include @@ -21,6 +22,11 @@ static int __init mod_init(void) { int ret; +#ifdef CONFIG_RHEL_DIFFERENCES + if (fips_enabled) + return -EOPNOTSUPP; +#endif + ret = wg_allowedips_slab_init(); if (ret < 0) goto err_allowedips; @@ -48,6 +54,7 @@ static int __init mod_init(void) pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.com for information.\n"); pr_info("Copyright (C) 2015-2019 Jason A. Donenfeld . All Rights Reserved.\n"); + mark_tech_preview("WireGuard", THIS_MODULE); return 0; err_netlink: diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index dfd9dec0c1f6..de735680d279 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -261,6 +261,9 @@ static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) static blk_status_t nvme_error_status(u16 status) { + if (unlikely(status & NVME_SC_DNR)) + return BLK_STS_TARGET; + switch (status & 0x7ff) { case NVME_SC_SUCCESS: return BLK_STS_OK; @@ -320,6 +323,7 @@ enum nvme_disposition { COMPLETE, RETRY, FAILOVER, + FAILUP, }; static inline enum nvme_disposition nvme_decide_disposition(struct request *req) @@ -327,15 +331,16 @@ static inline enum nvme_disposition nvme_decide_disposition(struct request *req) if (likely(nvme_req(req)->status == 0)) return COMPLETE; - if (blk_noretry_request(req) || + if ((req->cmd_flags & (REQ_FAILFAST_DEV | REQ_FAILFAST_DRIVER)) || (nvme_req(req)->status & NVME_SC_DNR) || nvme_req(req)->retries >= nvme_max_retries) return COMPLETE; - if (req->cmd_flags & REQ_NVME_MPATH) { + if (req->cmd_flags & (REQ_NVME_MPATH | REQ_FAILFAST_TRANSPORT)) { if (nvme_is_path_error(nvme_req(req)->status) || blk_queue_dying(req->q)) - return FAILOVER; + return (req->cmd_flags & REQ_NVME_MPATH) ? + FAILOVER : FAILUP; } else { if (blk_queue_dying(req->q)) return COMPLETE; @@ -357,6 +362,14 @@ static inline void nvme_end_req(struct request *req) blk_mq_end_request(req, status); } +static inline void nvme_failup_req(struct request *req) +{ + nvme_update_ana(req); + + nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR; + nvme_end_req(req); +} + void nvme_complete_rq(struct request *req) { trace_nvme_complete_rq(req); @@ -375,6 +388,9 @@ void nvme_complete_rq(struct request *req) case FAILOVER: nvme_failover_req(req); return; + case FAILUP: + nvme_failup_req(req); + return; } } EXPORT_SYMBOL_GPL(nvme_complete_rq); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 3f32c5e86bfc..3f95b8d5881f 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -65,14 +65,10 @@ bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags) return true; } -void nvme_failover_req(struct request *req) +void nvme_update_ana(struct request *req) { struct nvme_ns *ns = req->q->queuedata; u16 status = nvme_req(req)->status & 0x7ff; - unsigned long flags; - struct bio *bio; - - nvme_mpath_clear_current_path(ns); /* * If we got back an ANA error, we know the controller is alive but not @@ -83,6 +79,16 @@ void nvme_failover_req(struct request *req) set_bit(NVME_NS_ANA_PENDING, &ns->flags); queue_work(nvme_wq, &ns->ctrl->ana_work); } +} + +void nvme_failover_req(struct request *req) +{ + struct nvme_ns *ns = req->q->queuedata; + unsigned long flags; + struct bio *bio; + + nvme_mpath_clear_current_path(ns); + nvme_update_ana(req); spin_lock_irqsave(&ns->head->requeue_lock, flags); for (bio = req->bio; bio; bio = bio->bi_next) @@ -796,8 +802,7 @@ int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) int error = 0; /* check if multipath is enabled and we have the capability */ - if (!multipath || !ctrl->subsys || - !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)) + if (!ctrl->subsys || !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)) return 0; if (!ctrl->max_namespaces || diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 5cd1fa3b8464..2667c617ae92 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -706,6 +706,7 @@ void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys); void nvme_mpath_start_freeze(struct nvme_subsystem *subsys); bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, int *flags); void nvme_failover_req(struct request *req); +void nvme_update_ana(struct request *req); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); @@ -743,6 +744,9 @@ static inline bool nvme_mpath_set_disk_name(struct nvme_ns *ns, char *disk_name, static inline void nvme_failover_req(struct request *req) { } +static inline void nvme_update_ana(struct request *req) +{ +} static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) { } diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 3a72352aa5cf..47b11f3c7fce 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "pci.h" #include "pcie/portdrv.h" @@ -281,6 +282,34 @@ static struct attribute *pci_drv_attrs[] = { }; ATTRIBUTE_GROUPS(pci_drv); +/** + * pci_hw_vendor_status - Tell if a PCI device is supported by the HW vendor + * @ids: array of PCI device id structures to search in + * @dev: the PCI device structure to match against + * + * Used by a driver to check whether this device is in its list of unsupported + * devices. Returns the matching pci_device_id structure or %NULL if there is + * no match. + * + * Reserved for Internal Red Hat use only. + */ +const struct pci_device_id *pci_hw_vendor_status( + const struct pci_device_id *ids, + struct pci_dev *dev) +{ + char devinfo[64]; + const struct pci_device_id *ret = pci_match_id(ids, dev); + + if (ret) { + snprintf(devinfo, sizeof(devinfo), "%s %s", + dev_driver_string(&dev->dev), dev_name(&dev->dev)); + mark_hardware_deprecated(devinfo); + } + + return ret; +} +EXPORT_SYMBOL(pci_hw_vendor_status); + struct drv_dev_and_id { struct pci_driver *drv; struct pci_dev *dev; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6d74386eadc2..2333c1e4ae05 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4230,6 +4230,30 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9000, DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9084, quirk_bridge_cavm_thrx2_pcie_root); +/* + * PCI BAR 5 is not setup correctly for the on-board AHCI controller + * on Broadcom's Vulcan processor. Added a quirk to fix BAR 5 by + * using BAR 4's resources which are populated correctly and NOT + * actually used by the AHCI controller. + */ +static void quirk_fix_vulcan_ahci_bars(struct pci_dev *dev) +{ + struct resource *r = &dev->resource[4]; + + if (!(r->flags & IORESOURCE_MEM) || (r->start == 0)) + return; + + /* Set BAR5 resource to BAR4 */ + dev->resource[5] = *r; + + /* Update BAR5 in pci config space */ + pci_write_config_dword(dev, PCI_BASE_ADDRESS_5, r->start); + + /* Clear BAR4's resource */ + memset(r, 0, sizeof(*r)); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_BROADCOM, 0x9027, quirk_fix_vulcan_ahci_bars); + /* * Intersil/Techwell TW686[4589]-based video capture cards have an empty (zero) * class code. Fix it. diff --git a/drivers/pinctrl/intel/pinctrl-tigerlake.c b/drivers/pinctrl/intel/pinctrl-tigerlake.c index 3e4ef2b87526..0bcd19597e4a 100644 --- a/drivers/pinctrl/intel/pinctrl-tigerlake.c +++ b/drivers/pinctrl/intel/pinctrl-tigerlake.c @@ -701,32 +701,32 @@ static const struct pinctrl_pin_desc tglh_pins[] = { static const struct intel_padgroup tglh_community0_gpps[] = { TGL_GPP(0, 0, 24, 0), /* GPP_A */ - TGL_GPP(1, 25, 44, 128), /* GPP_R */ - TGL_GPP(2, 45, 70, 32), /* GPP_B */ - TGL_GPP(3, 71, 78, INTEL_GPIO_BASE_NOMAP), /* vGPIO_0 */ + TGL_GPP(1, 25, 44, 32), /* GPP_R */ + TGL_GPP(2, 45, 70, 64), /* GPP_B */ + TGL_GPP(3, 71, 78, 96), /* vGPIO_0 */ }; static const struct intel_padgroup tglh_community1_gpps[] = { - TGL_GPP(0, 79, 104, 96), /* GPP_D */ - TGL_GPP(1, 105, 128, 64), /* GPP_C */ - TGL_GPP(2, 129, 136, 160), /* GPP_S */ - TGL_GPP(3, 137, 153, 192), /* GPP_G */ - TGL_GPP(4, 154, 180, 224), /* vGPIO */ + TGL_GPP(0, 79, 104, 128), /* GPP_D */ + TGL_GPP(1, 105, 128, 160), /* GPP_C */ + TGL_GPP(2, 129, 136, 192), /* GPP_S */ + TGL_GPP(3, 137, 153, 224), /* GPP_G */ + TGL_GPP(4, 154, 180, 256), /* vGPIO */ }; static const struct intel_padgroup tglh_community3_gpps[] = { - TGL_GPP(0, 181, 193, 256), /* GPP_E */ - TGL_GPP(1, 194, 217, 288), /* GPP_F */ + TGL_GPP(0, 181, 193, 288), /* GPP_E */ + TGL_GPP(1, 194, 217, 320), /* GPP_F */ }; static const struct intel_padgroup tglh_community4_gpps[] = { - TGL_GPP(0, 218, 241, 320), /* GPP_H */ + TGL_GPP(0, 218, 241, 352), /* GPP_H */ TGL_GPP(1, 242, 251, 384), /* GPP_J */ - TGL_GPP(2, 252, 266, 352), /* GPP_K */ + TGL_GPP(2, 252, 266, 416), /* GPP_K */ }; static const struct intel_padgroup tglh_community5_gpps[] = { - TGL_GPP(0, 267, 281, 416), /* GPP_I */ + TGL_GPP(0, 267, 281, 448), /* GPP_I */ TGL_GPP(1, 282, 290, INTEL_GPIO_BASE_NOMAP), /* JTAG */ }; diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c index 3168915adaa7..71b48e29b708 100644 --- a/drivers/scsi/aacraid/linit.c +++ b/drivers/scsi/aacraid/linit.c @@ -78,6 +78,7 @@ char aac_driver_version[] = AAC_DRIVER_FULL_VERSION; * Note: The last field is used to index into aac_drivers below. */ static const struct pci_device_id aac_pci_tbl[] = { +#ifndef CONFIG_RHEL_DIFFERENCES { 0x1028, 0x0001, 0x1028, 0x0001, 0, 0, 0 }, /* PERC 2/Si (Iguana/PERC2Si) */ { 0x1028, 0x0002, 0x1028, 0x0002, 0, 0, 1 }, /* PERC 3/Di (Opal/PERC3Di) */ { 0x1028, 0x0003, 0x1028, 0x0003, 0, 0, 2 }, /* PERC 3/Si (SlimFast/PERC3Si */ @@ -145,6 +146,7 @@ static const struct pci_device_id aac_pci_tbl[] = { { 0x9005, 0x0285, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 59 }, /* Adaptec Catch All */ { 0x9005, 0x0286, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 60 }, /* Adaptec Rocket Catch All */ { 0x9005, 0x0288, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 61 }, /* Adaptec NEMER/ARK Catch All */ +#endif { 0x9005, 0x028b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 62 }, /* Adaptec PMC Series 6 (Tupelo) */ { 0x9005, 0x028c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 63 }, /* Adaptec PMC Series 7 (Denali) */ { 0x9005, 0x028d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 64 }, /* Adaptec PMC Series 8 */ diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index e70f69f791db..dc54f0321fab 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -369,11 +369,13 @@ static int beiscsi_eh_device_reset(struct scsi_cmnd *sc) /*------------------- PCI Driver operations and data ----------------- */ static const struct pci_device_id beiscsi_pci_id_table[] = { +#ifndef CONFIG_RHEL_DIFFERENCES { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) }, { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) }, { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) }, { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) }, { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID3) }, +#endif { PCI_DEVICE(ELX_VENDOR_ID, OC_SKH_ID1) }, { 0 } }; diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index f135a10f582b..99b17b05cab1 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -82,7 +82,9 @@ MODULE_DESCRIPTION("Driver for HP Smart Array Controller version " \ HPSA_DRIVER_VERSION); MODULE_VERSION(HPSA_DRIVER_VERSION); MODULE_LICENSE("GPL"); +#ifndef CONFIG_RHEL_DIFFERENCES MODULE_ALIAS("cciss"); +#endif static int hpsa_simple_mode; module_param(hpsa_simple_mode, int, S_IRUGO|S_IWUSR); @@ -144,10 +146,12 @@ static const struct pci_device_id hpsa_pci_device_id[] = { {PCI_VENDOR_ID_HP_3PAR, 0x0075, 0x1590, 0x007D}, {PCI_VENDOR_ID_HP_3PAR, 0x0075, 0x1590, 0x0088}, {PCI_VENDOR_ID_HP, 0x333f, 0x103c, 0x333f}, +#ifndef CONFIG_RHEL_DIFFERENCES {PCI_VENDOR_ID_HP, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_RAID << 8, 0xffff << 8, 0}, {PCI_VENDOR_ID_COMPAQ, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_RAID << 8, 0xffff << 8, 0}, +#endif {0,} }; diff --git a/drivers/scsi/lpfc/lpfc_ids.h b/drivers/scsi/lpfc/lpfc_ids.h index d48414e295a0..ba0e384412c9 100644 --- a/drivers/scsi/lpfc/lpfc_ids.h +++ b/drivers/scsi/lpfc/lpfc_ids.h @@ -24,6 +24,7 @@ #include const struct pci_device_id lpfc_id_table[] = { +#ifndef CONFIG_RHEL_DIFFERENCES {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_VIPER, PCI_ANY_ID, PCI_ANY_ID, }, {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_FIREFLY, @@ -54,14 +55,19 @@ const struct pci_device_id lpfc_id_table[] = { PCI_ANY_ID, PCI_ANY_ID, }, {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_HELIOS_DCSP, PCI_ANY_ID, PCI_ANY_ID, }, +#endif {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_BMID, PCI_ANY_ID, PCI_ANY_ID, }, +#ifndef CONFIG_RHEL_DIFFERENCES {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_BSMB, PCI_ANY_ID, PCI_ANY_ID, }, +#endif {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_ZEPHYR, PCI_ANY_ID, PCI_ANY_ID, }, +#ifndef CONFIG_RHEL_DIFFERENCES {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_HORNET, PCI_ANY_ID, PCI_ANY_ID, }, +#endif {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_ZEPHYR_SCSP, PCI_ANY_ID, PCI_ANY_ID, }, {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_ZEPHYR_DCSP, @@ -70,6 +76,7 @@ const struct pci_device_id lpfc_id_table[] = { PCI_ANY_ID, PCI_ANY_ID, }, {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_ZSMB, PCI_ANY_ID, PCI_ANY_ID, }, +#ifndef CONFIG_RHEL_DIFFERENCES {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_TFLY, PCI_ANY_ID, PCI_ANY_ID, }, {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_LP101, @@ -80,6 +87,7 @@ const struct pci_device_id lpfc_id_table[] = { PCI_ANY_ID, PCI_ANY_ID, }, {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_LPE11000S, PCI_ANY_ID, PCI_ANY_ID, }, +#endif {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_SAT, PCI_ANY_ID, PCI_ANY_ID, }, {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_SAT_MID, @@ -92,6 +100,7 @@ const struct pci_device_id lpfc_id_table[] = { PCI_ANY_ID, PCI_ANY_ID, }, {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_SAT_S, PCI_ANY_ID, PCI_ANY_ID, }, +#ifndef CONFIG_RHEL_DIFFERENCES {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_PROTEUS_VF, PCI_ANY_ID, PCI_ANY_ID, }, {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_PROTEUS_PF, @@ -102,18 +111,23 @@ const struct pci_device_id lpfc_id_table[] = { PCI_ANY_ID, PCI_ANY_ID, }, {PCI_VENDOR_ID_SERVERENGINE, PCI_DEVICE_ID_TOMCAT, PCI_ANY_ID, PCI_ANY_ID, }, +#endif {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_FALCON, PCI_ANY_ID, PCI_ANY_ID, }, +#ifndef CONFIG_RHEL_DIFFERENCES {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_BALIUS, PCI_ANY_ID, PCI_ANY_ID, }, +#endif {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_LANCER_FC, PCI_ANY_ID, PCI_ANY_ID, }, +#ifndef CONFIG_RHEL_DIFFERENCES {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_LANCER_FCOE, PCI_ANY_ID, PCI_ANY_ID, }, {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_LANCER_FC_VF, PCI_ANY_ID, PCI_ANY_ID, }, {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_LANCER_FCOE_VF, PCI_ANY_ID, PCI_ANY_ID, }, +#endif {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_LANCER_G6_FC, PCI_ANY_ID, PCI_ANY_ID, }, {PCI_VENDOR_ID_EMULEX, PCI_DEVICE_ID_LANCER_G7_FC, diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index ec10b2497310..ab5f0c9f3c68 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -149,6 +149,7 @@ megasas_set_ld_removed_by_fw(struct megasas_instance *instance); */ static struct pci_device_id megasas_pci_table[] = { +#ifndef CONFIG_RHEL_DIFFERENCES {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1064R)}, /* xscale IOP */ {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1078R)}, @@ -167,6 +168,7 @@ static struct pci_device_id megasas_pci_table[] = { /* xscale IOP, vega */ {PCI_DEVICE(PCI_VENDOR_ID_DELL, PCI_DEVICE_ID_DELL_PERC5)}, /* xscale IOP */ +#endif {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_FUSION)}, /* Fusion */ {PCI_DEVICE(PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_PLASMA)}, diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 8e64a6f14542..f8a28b42e547 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -12480,6 +12480,7 @@ bool scsih_ncq_prio_supp(struct scsi_device *sdev) * The pci device ids are defined in mpi/mpi2_cnfg.h. */ static const struct pci_device_id mpt3sas_pci_table[] = { +#ifndef CONFIG_RHEL_DIFFERENCES /* Spitfire ~ 2004 */ { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2004, PCI_ANY_ID, PCI_ANY_ID }, @@ -12498,6 +12499,7 @@ static const struct pci_device_id mpt3sas_pci_table[] = { PCI_ANY_ID, PCI_ANY_ID }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2116_2, PCI_ANY_ID, PCI_ANY_ID }, +#endif /* Thunderbolt ~ 2208 */ { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SAS2208_1, PCI_ANY_ID, PCI_ANY_ID }, @@ -12522,9 +12524,11 @@ static const struct pci_device_id mpt3sas_pci_table[] = { PCI_ANY_ID, PCI_ANY_ID }, { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SWITCH_MPI_EP_1, PCI_ANY_ID, PCI_ANY_ID }, +#ifndef CONFIG_RHEL_DIFFERENCES /* SSS6200 */ { MPI2_MFGPAGE_VENDORID_LSI, MPI2_MFGPAGE_DEVID_SSS6200, PCI_ANY_ID, PCI_ANY_ID }, +#endif /* Fury ~ 3004 and 3008 */ { MPI2_MFGPAGE_VENDORID_LSI, MPI25_MFGPAGE_DEVID_SAS3004, PCI_ANY_ID, PCI_ANY_ID }, diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index cedd558f65eb..d2534cd5559f 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -7811,6 +7811,7 @@ static const struct pci_error_handlers qla2xxx_err_handler = { }; static struct pci_device_id qla2xxx_pci_tbl[] = { +#ifndef CONFIG_RHEL_DIFFERENCES { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP2100) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP2200) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP2300) }, @@ -7823,13 +7824,18 @@ static struct pci_device_id qla2xxx_pci_tbl[] = { { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP8432) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP5422) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP5432) }, +#endif { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP2532) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP2031) }, +#ifndef CONFIG_RHEL_DIFFERENCES { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP8001) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP8021) }, +#endif { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP8031) }, +#ifndef CONFIG_RHEL_DIFFERENCES { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISPF001) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP8044) }, +#endif { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP2071) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP2271) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_ISP2261) }, diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 6ee7ea4c27e0..31814931620a 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -9855,6 +9855,7 @@ static struct pci_device_id qla4xxx_pci_tbl[] = { .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, }, +#ifndef CONFIG_RHEL_DIFFERENCES { .vendor = PCI_VENDOR_ID_QLOGIC, .device = PCI_DEVICE_ID_QLOGIC_ISP8022, @@ -9873,6 +9874,7 @@ static struct pci_device_id qla4xxx_pci_tbl[] = { .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, }, +#endif {0, 0}, }; MODULE_DEVICE_TABLE(pci, qla4xxx_pci_tbl); diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 86658a81d284..5647f4756e97 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -5657,6 +5657,13 @@ static void hub_event(struct work_struct *work) (u16) hub->change_bits[0], (u16) hub->event_bits[0]); + /* Don't disconnect USB-SATA on TrimSlice */ + if (strcmp(dev_name(hdev->bus->controller), "tegra-ehci.0") == 0) { + if ((hdev->state == 7) && (hub->change_bits[0] == 0) && + (hub->event_bits[0] == 0x2)) + hub->event_bits[0] = 0; + } + /* Lock the device, then check to see if we were * disconnected while waiting for the lock to succeed. */ usb_lock_device(hdev); diff --git a/include/linux/efi.h b/include/linux/efi.h index 6b5d36babfcc..fd4a5d66a9d0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -43,6 +43,8 @@ #define EFI_ABORTED (21 | (1UL << (BITS_PER_LONG-1))) #define EFI_SECURITY_VIOLATION (26 | (1UL << (BITS_PER_LONG-1))) +#define EFI_IS_ERROR(x) ((x) & (1UL << (BITS_PER_LONG-1))) + typedef unsigned long efi_status_t; typedef u8 efi_bool_t; typedef u16 efi_char16_t; /* UNICODE character */ @@ -782,6 +784,14 @@ extern int __init efi_setup_pcdp_console(char *); #define EFI_MEM_ATTR 10 /* Did firmware publish an EFI_MEMORY_ATTRIBUTES table? */ #define EFI_MEM_NO_SOFT_RESERVE 11 /* Is the kernel configured to ignore soft reservations? */ #define EFI_PRESERVE_BS_REGIONS 12 /* Are EFI boot-services memory segments available? */ +#define EFI_SECURE_BOOT 13 /* Are we in Secure Boot mode? */ + +enum efi_secureboot_mode { + efi_secureboot_mode_unset, + efi_secureboot_mode_unknown, + efi_secureboot_mode_disabled, + efi_secureboot_mode_enabled, +}; #ifdef CONFIG_EFI /* @@ -793,6 +803,8 @@ static inline bool efi_enabled(int feature) } extern void efi_reboot(enum reboot_mode reboot_mode, const char *__unused); +extern void __init efi_set_secure_boot(enum efi_secureboot_mode mode); + bool __pure __efi_soft_reserve_enabled(void); static inline bool __pure efi_soft_reserve_enabled(void) @@ -813,6 +825,8 @@ static inline bool efi_enabled(int feature) static inline void efi_reboot(enum reboot_mode reboot_mode, const char *__unused) {} +static inline void efi_set_secure_boot(enum efi_secureboot_mode mode) {} + static inline bool efi_soft_reserve_enabled(void) { return false; @@ -825,6 +839,7 @@ static inline bool efi_rt_services_supported(unsigned int mask) #endif extern int efi_status_to_err(efi_status_t status); +extern const char *efi_status_to_str(efi_status_t status); /* * Variable Attributes @@ -1077,13 +1092,6 @@ static inline bool efi_runtime_disabled(void) { return true; } extern void efi_call_virt_check_flags(unsigned long flags, const char *call); extern unsigned long efi_call_virt_save_flags(void); -enum efi_secureboot_mode { - efi_secureboot_mode_unset, - efi_secureboot_mode_unknown, - efi_secureboot_mode_disabled, - efi_secureboot_mode_enabled, -}; - static inline enum efi_secureboot_mode efi_get_secureboot_mode(efi_get_variable_t *get_var) { diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 1b2f0a7e00d6..bfbf97662e1e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -530,4 +530,19 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } /* OTHER_WRITABLE? Generally considered a bad idea. */ \ BUILD_BUG_ON_ZERO((perms) & 2) + \ (perms)) + +struct module; + +#ifdef CONFIG_RHEL_DIFFERENCES +void mark_hardware_unsupported(const char *msg); +void mark_hardware_deprecated(const char *msg); +void mark_tech_preview(const char *msg, struct module *mod); +void mark_driver_unsupported(const char *name); +#else +static inline void mark_hardware_unsupported(const char *msg) { } +static inline void mark_hardware_deprecated(const char *msg) { } +static inline void mark_tech_preview(const char *msg, struct module *mod) { } +static inline void mark_driver_unsupported(const char *name) { } +#endif + #endif diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 2adeea44c0d5..517013ece679 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -394,6 +394,8 @@ LSM_HOOK(void, LSM_RET_VOID, bpf_prog_free_security, struct bpf_prog_aux *aux) #endif /* CONFIG_BPF_SYSCALL */ LSM_HOOK(int, 0, locked_down, enum lockdown_reason what) +LSM_HOOK(int, 0, lock_kernel_down, const char *where, enum lockdown_reason level) + #ifdef CONFIG_PERF_EVENTS LSM_HOOK(int, 0, perf_event_open, struct perf_event_attr *attr, int type) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 5c4c5c0602cb..753b53038690 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1545,6 +1545,12 @@ * * @what: kernel feature being accessed * + * @lock_kernel_down + * Put the kernel into lock-down mode. + * + * @where: Where the lock-down is originating from (e.g. command line option) + * @level: The lock-down level (can only increase) + * * Security hooks for perf events * * @perf_event_open: diff --git a/include/linux/module.h b/include/linux/module.h index 8a298d820dbc..b71215412e59 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -380,6 +380,7 @@ struct module { struct module_attribute *modinfo_attrs; const char *version; const char *srcversion; + const char *rhelversion; struct kobject *holders_dir; /* Exported symbols */ diff --git a/include/linux/panic.h b/include/linux/panic.h index f5844908a089..901d51012738 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -74,7 +74,24 @@ static inline void set_arch_panic_timeout(int timeout, int arch_default_timeout) #define TAINT_LIVEPATCH 15 #define TAINT_AUX 16 #define TAINT_RANDSTRUCT 17 -#define TAINT_FLAGS_COUNT 18 +/* Start of Red Hat-specific taint flags */ +#define TAINT_18 18 +#define TAINT_19 19 +#define TAINT_20 20 +#define TAINT_21 21 +#define TAINT_22 22 +#define TAINT_23 23 +#define TAINT_24 24 +#define TAINT_25 25 +#define TAINT_26 26 +#define TAINT_SUPPORT_REMOVED 27 +/* Bits 28 - 31 are reserved for Red Hat use only */ +#define TAINT_RESERVED28 28 +#define TAINT_RESERVED29 29 +#define TAINT_RESERVED30 30 +#define TAINT_UNPRIVILEGED_BPF 31 +/* End of Red Hat-specific taint flags */ +#define TAINT_FLAGS_COUNT 32 #define TAINT_FLAGS_MAX ((1UL << TAINT_FLAGS_COUNT) - 1) struct taint_flag { diff --git a/include/linux/pci.h b/include/linux/pci.h index 540b377ca8f6..eb21f6dfb846 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1433,6 +1433,10 @@ int pci_add_dynid(struct pci_driver *drv, unsigned long driver_data); const struct pci_device_id *pci_match_id(const struct pci_device_id *ids, struct pci_dev *dev); +/* Reserved for Internal Red Hat use only */ +const struct pci_device_id *pci_hw_vendor_status( + const struct pci_device_id *ids, + struct pci_dev *dev); int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max, int pass); diff --git a/include/linux/rh_kabi.h b/include/linux/rh_kabi.h new file mode 100644 index 000000000000..ea9c136bf884 --- /dev/null +++ b/include/linux/rh_kabi.h @@ -0,0 +1,297 @@ +/* + * rh_kabi.h - Red Hat kABI abstraction header + * + * Copyright (c) 2014 Don Zickus + * Copyright (c) 2015-2018 Jiri Benc + * Copyright (c) 2015 Sabrina Dubroca, Hannes Frederic Sowa + * Copyright (c) 2016-2018 Prarit Bhargava + * Copyright (c) 2017 Paolo Abeni, Larry Woodman + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + * + * These kabi macros hide the changes from the kabi checker and from the + * process that computes the exported symbols' checksums. + * They have 2 variants: one (defined under __GENKSYMS__) used when + * generating the checksums, and the other used when building the kernel's + * binaries. + * + * The use of these macros does not guarantee that the usage and modification + * of code is correct. As with all Red Hat only changes, an engineer must + * explain why the use of the macro is valid in the patch containing the + * changes. + * + */ + +#ifndef _LINUX_RH_KABI_H +#define _LINUX_RH_KABI_H + +#include +#include + +/* + * RH_KABI_CONST + * Adds a new const modifier to a function parameter preserving the old + * checksum. + * + * RH_KABI_DEPRECATE + * Mark the element as deprecated and make it unusable by modules while + * preserving kABI checksums. + * + * RH_KABI_DEPRECATE_FN + * Mark the function pointer as deprecated and make it unusable by modules + * while preserving kABI checksums. + * + * RH_KABI_EXTEND + * Simple macro for adding a new element to a struct. + * + * RH_KABI_EXTEND_WITH_SIZE + * Adds a new element (usually a struct) to a struct and reserves extra + * space for the new element. The provided 'size' is the total space to + * be added in longs (i.e. it's 8 * 'size' bytes), including the size of + * the added element. It is automatically checked that the new element + * does not overflow the reserved space, now nor in the future. However, + * no attempt is done to check the content of the added element (struct) + * for kABI conformance - kABI checking inside the added element is + * effectively switched off. + * For any struct being added by RH_KABI_EXTEND_WITH_SIZE, it is + * recommended its content to be documented as not covered by kABI + * guarantee. + * + * RH_KABI_FILL_HOLE + * Simple macro for filling a hole in a struct. + * + * Warning: only use if a hole exists for _all_ arches. Use pahole to verify. + * + * RH_KABI_RENAME + * Simple macro for renaming an element without changing its type. This + * macro can be used in bitfields, for example. + * + * NOTE: does not include the final ';' + * + * RH_KABI_REPLACE + * Simple replacement of _orig with a union of _orig and _new. + * + * The RH_KABI_REPLACE* macros attempt to add the ability to use the '_new' + * element while preserving size alignment with the '_orig' element. + * + * The #ifdef __GENKSYMS__ preserves the kABI agreement, while the anonymous + * union structure preserves the size alignment (assuming the '_new' element + * is not bigger than the '_orig' element). + * + * RH_KABI_REPLACE_UNSAFE + * Unsafe version of RH_KABI_REPLACE. Only use for typedefs. + * + * RH_KABI_FORCE_CHANGE + * Force change of the symbol checksum. The argument of the macro is a + * version for cases we need to do this more than once. + * + * This macro does the opposite: it changes the symbol checksum without + * actually changing anything about the exported symbol. It is useful for + * symbols that are not whitelisted, we're changing them in an + * incompatible way and want to prevent 3rd party modules to silently + * corrupt memory. Instead, by changing the symbol checksum, such modules + * won't be loaded by the kernel. This macro should only be used as a + * last resort when all other KABI workarounds have failed. + * + * RH_KABI_EXCLUDE + * !!! WARNING: DANGEROUS, DO NOT USE unless you are aware of all the !!! + * !!! implications. This should be used ONLY EXCEPTIONALLY and only !!! + * !!! under specific circumstances. Very likely, this macro does not !!! + * !!! do what you expect it to do. Note that any usage of this macro !!! + * !!! MUST be paired with a RH_KABI_FORCE_CHANGE annotation of !!! + * !!! a suitable symbol (or an equivalent safeguard) and the commit !!! + * !!! log MUST explain why the chosen solution is appropriate. !!! + * + * Exclude the element from checksum generation. Any such element is + * considered not to be part of the kABI whitelist and may be changed at + * will. Note however that it's the responsibility of the developer + * changing the element to ensure 3rd party drivers using this element + * won't panic, for example by not allowing them to be loaded. That can + * be achieved by changing another, non-whitelisted symbol they use, + * either by nature of the change or by using RH_KABI_FORCE_CHANGE. + * + * Also note that any change to the element must preserve its size. Change + * of the size is not allowed and would constitute a silent kABI breakage. + * Beware that the RH_KABI_EXCLUDE macro does not do any size checks. + * + * NOTE + * Don't use ';' after these macros as it messes up the kABI checker by + * changing what the resulting token string looks like. Instead let this + * macro add the ';' so it can be properly hidden from the kABI checker + * (mainly for RH_KABI_EXTEND, but applied to all macros for uniformity). + * + */ +#ifdef __GENKSYMS__ + +# define RH_KABI_CONST +# define RH_KABI_EXTEND(_new) +# define RH_KABI_FILL_HOLE(_new) +# define RH_KABI_FORCE_CHANGE(ver) __attribute__((rh_kabi_change ## ver)) +# define RH_KABI_RENAME(_orig, _new) _orig + +# define _RH_KABI_DEPRECATE(_type, _orig) _type _orig +# define _RH_KABI_DEPRECATE_FN(_type, _orig, _args...) _type (*_orig)(_args) +# define _RH_KABI_REPLACE(_orig, _new) _orig +# define _RH_KABI_REPLACE_UNSAFE(_orig, _new) _orig +# define _RH_KABI_EXCLUDE(_elem) + +#else + +# define RH_KABI_ALIGN_WARNING ". Disable CONFIG_RH_KABI_SIZE_ALIGN_CHECKS if debugging." + +# define RH_KABI_CONST const +# define RH_KABI_EXTEND(_new) _new; +# define RH_KABI_FILL_HOLE(_new) _new; +# define RH_KABI_FORCE_CHANGE(ver) +# define RH_KABI_RENAME(_orig, _new) _new + + +#if IS_BUILTIN(CONFIG_RH_KABI_SIZE_ALIGN_CHECKS) +# define __RH_KABI_CHECK_SIZE_ALIGN(_orig, _new) \ + union { \ + _Static_assert(sizeof(struct{_new;}) <= sizeof(struct{_orig;}), \ + __FILE__ ":" __stringify(__LINE__) ": " __stringify(_new) " is larger than " __stringify(_orig) RH_KABI_ALIGN_WARNING); \ + _Static_assert(__alignof__(struct{_new;}) <= __alignof__(struct{_orig;}), \ + __FILE__ ":" __stringify(__LINE__) ": " __stringify(_orig) " is not aligned the same as " __stringify(_new) RH_KABI_ALIGN_WARNING); \ + } +# define __RH_KABI_CHECK_SIZE(_item, _size) \ + _Static_assert(sizeof(struct{_item;}) <= _size, \ + __FILE__ ":" __stringify(__LINE__) ": " __stringify(_item) " is larger than the reserved size (" __stringify(_size) " bytes)" RH_KABI_ALIGN_WARNING) +#else +# define __RH_KABI_CHECK_SIZE_ALIGN(_orig, _new) +# define __RH_KABI_CHECK_SIZE(_item, _size) +#endif + +#define RH_KABI_UNIQUE_ID __PASTE(rh_kabi_hidden_, __LINE__) + +# define _RH_KABI_DEPRECATE(_type, _orig) _type rh_reserved_##_orig +# define _RH_KABI_DEPRECATE_FN(_type, _orig, _args...) \ + _type (* rh_reserved_##_orig)(_args) +# define _RH_KABI_REPLACE(_orig, _new) \ + union { \ + _new; \ + struct { \ + _orig; \ + } RH_KABI_UNIQUE_ID; \ + __RH_KABI_CHECK_SIZE_ALIGN(_orig, _new); \ + } +# define _RH_KABI_REPLACE_UNSAFE(_orig, _new) _new + +# define _RH_KABI_EXCLUDE(_elem) _elem + +#endif /* __GENKSYMS__ */ + +/* semicolon added wrappers for the RH_KABI_REPLACE macros */ +# define RH_KABI_DEPRECATE(_type, _orig) _RH_KABI_DEPRECATE(_type, _orig); +# define RH_KABI_DEPRECATE_FN(_type, _orig, _args...) \ + _RH_KABI_DEPRECATE_FN(_type, _orig, _args); +# define RH_KABI_REPLACE(_orig, _new) _RH_KABI_REPLACE(_orig, _new); +# define RH_KABI_REPLACE_UNSAFE(_orig, _new) _RH_KABI_REPLACE_UNSAFE(_orig, _new); +/* + * Macro for breaking up a random element into two smaller chunks using an + * anonymous struct inside an anonymous union. + */ +# define RH_KABI_REPLACE2(orig, _new1, _new2) RH_KABI_REPLACE(orig, struct{ _new1; _new2;}) + +# define RH_KABI_RESERVE(n) _RH_KABI_RESERVE(n); +/* + * Simple wrappers to replace standard Red Hat reserved elements. + */ +# define RH_KABI_USE(n, _new) RH_KABI_REPLACE(_RH_KABI_RESERVE(n), _new) +/* + * Macros for breaking up a reserved element into two smaller chunks using + * an anonymous struct inside an anonymous union. + */ +# define RH_KABI_USE2(n, _new1, _new2) RH_KABI_REPLACE(_RH_KABI_RESERVE(n), struct{ _new1; _new2; }) + +/* + * We tried to standardize on Red Hat reserved names. These wrappers + * leverage those common names making it easier to read and find in the + * code. + */ +# define _RH_KABI_RESERVE(n) unsigned long rh_reserved##n + +#define RH_KABI_EXCLUDE(_elem) _RH_KABI_EXCLUDE(_elem); + +/* + * Extending a struct while reserving extra space. + */ +#define RH_KABI_EXTEND_WITH_SIZE(_new, _size) \ + RH_KABI_EXTEND(union { \ + _new; \ + unsigned long RH_KABI_UNIQUE_ID[_size]; \ + __RH_KABI_CHECK_SIZE(_new, 8 * (_size)); \ + }) + +/* + * RHEL macros to extend structs. + * + * base struct: The struct being extended. For example, pci_dev. + * extended struct: The Red Hat struct being added to the base struct. + * For example, pci_dev_rh. + * + * These macros should be used to extend structs before KABI freeze. + * They can be used post-KABI freeze in the limited case of the base + * struct not being embedded in another struct. + * + * Extended structs cannot be shrunk in size as changes will break + * the size & offset comparison. + * + * Extended struct elements are not guaranteed for access by modules unless + * explicitly commented as such in the declaration of the extended struct or + * the element in the extended struct. + */ + +/* + * RH_KABI_SIZE_AND_EXTEND|_PTR() extends a struct by embedding or adding + * a pointer in a base struct. The name of the new struct is the name + * of the base struct appended with _rh. + */ +#define _RH_KABI_SIZE_AND_EXTEND_PTR(_struct) \ + size_t _struct##_size_rh; \ + RH_KABI_EXCLUDE(struct _struct##_rh *_struct##_rh) +#define RH_KABI_SIZE_AND_EXTEND_PTR(_struct) \ + _RH_KABI_SIZE_AND_EXTEND_PTR(_struct) + +#define _RH_KABI_SIZE_AND_EXTEND(_struct) \ + size_t _struct##_size_rh; \ + RH_KABI_EXCLUDE(struct _struct##_rh _struct##_rh) +#define RH_KABI_SIZE_AND_EXTEND(_struct) \ + _RH_KABI_SIZE_AND_EXTEND(_struct) + +/* + * RH_KABI_SET_SIZE calculates and sets the size of the extended struct and + * stores it in the size_rh field for structs that are dynamically allocated. + * This macro MUST be called when expanding a base struct with + * RH_KABI_SIZE_AND_EXTEND, and it MUST be called from the allocation site + * regardless of being allocated in the kernel or a module. + * Note: since this macro is intended to be invoked outside of a struct, + * a semicolon is necessary at the end of the line where it is invoked. + */ +#define RH_KABI_SET_SIZE(_name, _struct) ({ \ + _name->_struct##_size_rh = sizeof(struct _struct##_rh); \ +}) + +/* + * RH_KABI_INIT_SIZE calculates and sets the size of the extended struct and + * stores it in the size_rh field for structs that are statically allocated. + * This macro MUST be called when expanding a base struct with + * RH_KABI_SIZE_AND_EXTEND, and it MUST be called from the declaration site + * regardless of being allocated in the kernel or a module. + */ +#define RH_KABI_INIT_SIZE(_struct) \ + ._struct##_size_rh = sizeof(struct _struct##_rh), + +/* + * RH_KABI_CHECK_EXT verifies allocated memory exists. This MUST be called to + * verify that memory in the _rh struct is valid, and can be called + * regardless if RH_KABI_SIZE_AND_EXTEND or RH_KABI_SIZE_AND_EXTEND_PTR is + * used. + */ +#define RH_KABI_CHECK_EXT(_ptr, _struct, _field) ({ \ + size_t __off = offsetof(struct _struct##_rh, _field); \ + _ptr->_struct##_size_rh > __off ? true : false; \ +}) + +#endif /* _LINUX_RH_KABI_H */ diff --git a/include/linux/rmi.h b/include/linux/rmi.h index ab7eea01ab42..fff7c5f737fc 100644 --- a/include/linux/rmi.h +++ b/include/linux/rmi.h @@ -364,6 +364,7 @@ struct rmi_driver_data { struct rmi4_attn_data attn_data; DECLARE_KFIFO(attn_fifo, struct rmi4_attn_data, 16); + struct work_struct attn_work; }; int rmi_register_transport_device(struct rmi_transport_dev *xport); diff --git a/include/linux/security.h b/include/linux/security.h index 24eda04221e9..128837ea1fb2 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -471,6 +471,7 @@ int security_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen); int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); int security_locked_down(enum lockdown_reason what); +int security_lock_kernel_down(const char *where, enum lockdown_reason level); #else /* CONFIG_SECURITY */ static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) @@ -1347,6 +1348,10 @@ static inline int security_locked_down(enum lockdown_reason what) { return 0; } +static inline int security_lock_kernel_down(const char *where, enum lockdown_reason level) +{ + return 0; +} #endif /* CONFIG_SECURITY */ #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) diff --git a/init/Kconfig b/init/Kconfig index 55f9f7738ebb..564553afb251 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1625,7 +1625,7 @@ config AIO this option saves about 7k. config IO_URING - bool "Enable IO uring support" if EXPERT + bool "Enable IO uring support" select IO_WQ default y help diff --git a/kernel/Makefile b/kernel/Makefile index 4df609be42d0..4ef0c0f6a8f4 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -12,6 +12,7 @@ obj-y = fork.o exec_domain.o panic.o \ notifier.o ksysfs.o cred.o reboot.o \ async.o range.o smpboot.o ucount.o regset.o +obj-$(CONFIG_RHEL_DIFFERENCES) += rh_taint.o obj-$(CONFIG_USERMODE_DRIVER) += usermode_driver.o obj-$(CONFIG_MODULES) += kmod.o obj-$(CONFIG_MULTIUSER) += groups.o diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e343f158e556..b2c979f9b80e 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,23 @@ static DEFINE_SPINLOCK(map_idr_lock); static DEFINE_IDR(link_idr); static DEFINE_SPINLOCK(link_idr_lock); +static int __init unprivileged_bpf_setup(char *str) +{ + unsigned long disabled; + if (!kstrtoul(str, 0, &disabled)) + sysctl_unprivileged_bpf_disabled = !!disabled; + + if (!sysctl_unprivileged_bpf_disabled) { + pr_warn("Unprivileged BPF has been enabled " + "(unprivileged_bpf_disabled=0 has been supplied " + "in boot parameters), tainting the kernel"); + add_taint(TAINT_UNPRIVILEGED_BPF, LOCKDEP_STILL_OK); + } + + return 1; +} +__setup("unprivileged_bpf_disabled=", unprivileged_bpf_setup); + int sysctl_unprivileged_bpf_disabled __read_mostly = IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0; diff --git a/kernel/module.c b/kernel/module.c index ed13917ea5f3..88643913f3e4 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -743,6 +743,7 @@ static struct module_attribute modinfo_##field = { \ MODINFO_ATTR(version); MODINFO_ATTR(srcversion); +MODINFO_ATTR(rhelversion); static char last_unloaded_module[MODULE_NAME_LEN+1]; @@ -1206,6 +1207,7 @@ static struct module_attribute *modinfo_attrs[] = { &module_uevent, &modinfo_version, &modinfo_srcversion, + &modinfo_rhelversion, &modinfo_initstate, &modinfo_coresize, &modinfo_initsize, diff --git a/kernel/module_signing.c b/kernel/module_signing.c index 8723ae70ea1f..fb2d773498c2 100644 --- a/kernel/module_signing.c +++ b/kernel/module_signing.c @@ -38,8 +38,15 @@ int mod_verify_sig(const void *mod, struct load_info *info) modlen -= sig_len + sizeof(ms); info->len = modlen; - return verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, + ret = verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, VERIFY_USE_SECONDARY_KEYRING, VERIFYING_MODULE_SIGNATURE, NULL, NULL); + if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) { + ret = verify_pkcs7_signature(mod, modlen, mod + modlen, sig_len, + VERIFY_USE_PLATFORM_KEYRING, + VERIFYING_MODULE_SIGNATURE, + NULL, NULL); + } + return ret; } diff --git a/kernel/panic.c b/kernel/panic.c index edad89660a2b..71a5a1cf6235 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -387,6 +387,20 @@ const struct taint_flag taint_flags[TAINT_FLAGS_COUNT] = { [ TAINT_LIVEPATCH ] = { 'K', ' ', true }, [ TAINT_AUX ] = { 'X', ' ', true }, [ TAINT_RANDSTRUCT ] = { 'T', ' ', true }, + [ TAINT_18 ] = { '?', '-', false }, + [ TAINT_19 ] = { '?', '-', false }, + [ TAINT_20 ] = { '?', '-', false }, + [ TAINT_21 ] = { '?', '-', false }, + [ TAINT_22 ] = { '?', '-', false }, + [ TAINT_23 ] = { '?', '-', false }, + [ TAINT_24 ] = { '?', '-', false }, + [ TAINT_25 ] = { '?', '-', false }, + [ TAINT_26 ] = { '?', '-', false }, + [ TAINT_SUPPORT_REMOVED ] = { 'h', ' ', false }, + [ TAINT_RESERVED28 ] = { '?', '-', false }, + [ TAINT_RESERVED29 ] = { '?', '-', false }, + [ TAINT_RESERVED30 ] = { '?', '-', false }, + [ TAINT_UNPRIVILEGED_BPF ] = { 'u', ' ', false }, }; /** diff --git a/kernel/rh_taint.c b/kernel/rh_taint.c new file mode 100644 index 000000000000..4050b6dead75 --- /dev/null +++ b/kernel/rh_taint.c @@ -0,0 +1,93 @@ +#include +#include + +/* + * The following functions are used by Red Hat to indicate to users that + * hardware and drivers are unsupported, or have limited support in RHEL major + * and minor releases. These functions output loud warning messages to the end + * user and should be USED WITH CAUTION. + * + * Any use of these functions _MUST_ be documented in the RHEL Release Notes, + * and have approval of management. + */ + +/** + * mark_hardware_unsupported() - Mark hardware, class, or type as unsupported. + * @msg: Hardware name, class, or type + * + * Called to mark a device, class of devices, or types of devices as not having + * support in any RHEL minor release. This does not TAINT the kernel. Red Hat + * will not fix bugs against this hardware in this minor release. Red Hat may + * declare support in a future major or minor update release. This cannot be + * used to mark drivers unsupported. + */ +void mark_hardware_unsupported(const char *msg) +{ + /* Print one single message */ + pr_crit("Warning: %s - this hardware has not undergone testing by Red Hat and might not be certified. Please consult https://catalog.redhat.com for certified hardware.\n", msg); +} +EXPORT_SYMBOL(mark_hardware_unsupported); + +/** + * mark_hardware_deprecated() - Mark hardware, class, or type as deprecated. + * @msg: Hardware name, class, or type + * + * Called to minimize the support status of a previously supported device in + * a minor release. This does not TAINT the kernel. Marking hardware + * deprecated is usually done in conjunction with the hardware vendor. Future + * RHEL major releases may not include this driver. Driver updates and fixes + * for this device will be limited to critical issues in future minor releases. + */ +void mark_hardware_deprecated(const char *msg) +{ + pr_crit("Warning: %s - this hardware is not recommended for new deployments. It continues to be supported in this RHEL release, but it is likely to be removed in the next major release. Driver updates and fixes for this device will be limited to critical issues. Please contact Red Hat Support or your device's hardware vendor for additional information.\n", msg); +} +EXPORT_SYMBOL(mark_hardware_deprecated); + +/** + * mark_tech_preview() - Mark driver or kernel subsystem as 'Tech Preview' + * @msg: Driver or kernel subsystem name + * + * Called to minimize the support status of a new driver. This does TAINT the + * kernel. Calling this function indicates that the driver or subsystem has + * had limited testing and is not marked for full support within this RHEL + * minor release. The next RHEL minor release may contain full support for + * this driver. Red Hat does not guarantee that bugs reported against this + * driver or subsystem will be resolved. + */ +void mark_tech_preview(const char *msg, struct module *mod) +{ + const char *str = NULL; + + if (msg) + str = msg; +#ifdef CONFIG_MODULES + else if (mod && mod->name) + str = mod->name; +#endif + + pr_warn("TECH PREVIEW: %s may not be fully supported.\n" + "Please review provided documentation for limitations.\n", + (str ? str : "kernel")); + add_taint(TAINT_AUX, LOCKDEP_STILL_OK); +#ifdef CONFIG_MODULES + if (mod) + mod->taints |= (1U << TAINT_AUX); +#endif +} +EXPORT_SYMBOL(mark_tech_preview); + +/** + * mark_driver_unsupported - drivers that we know we don't want to support + * @name: the name of the driver + * + * In some cases Red Hat has chosen to build a driver for internal QE + * use. Use this function to mark those drivers as unsupported for + * customers. + */ +void mark_driver_unsupported(const char *name) +{ + pr_crit("Warning: %s - This driver has not undergone sufficient testing by Red Hat for this release and therefore cannot be used in production systems.\n", + name ? name : "kernel"); +} +EXPORT_SYMBOL(mark_driver_unsupported); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 272f4a272f8c..3a5a6455b363 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -243,6 +243,11 @@ static int bpf_unpriv_handler(struct ctl_table *table, int write, if (write && !ret) { if (locked_state && unpriv_enable != 1) return -EPERM; + if (!unpriv_enable) { + pr_warn("Unprivileged BPF has been enabled, " + "tainting the kernel"); + add_taint(TAINT_UNPRIVILEGED_BPF, LOCKDEP_STILL_OK); + } *(int *)table->data = unpriv_enable; } return ret; diff --git a/mm/cma.c b/mm/cma.c index 995e15480937..588f7e7885cf 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -125,6 +125,12 @@ static void __init cma_activate_area(struct cma *cma) spin_lock_init(&cma->mem_head_lock); #endif +#ifdef CONFIG_RHEL_DIFFERENCES + /* s390x and ppc64 has been using CMA already in RHEL 8 as default. */ + if (!IS_ENABLED(CONFIG_S390) && !IS_ENABLED(CONFIG_PPC64)) + mark_tech_preview("CMA", NULL); +#endif /* CONFIG_RHEL_DIFFERENCES */ + return; not_in_zone: @@ -437,6 +443,10 @@ struct page *cma_alloc(struct cma *cma, unsigned long count, if (!cma || !cma->count || !cma->bitmap) goto out; +#ifdef CONFIG_RHEL_DIFFERENCES + pr_info_once("Initial CMA usage detected\n"); +#endif /* CONFIG_RHEL_DIFFERENCES */ + pr_debug("%s(cma %p, count %lu, align %d)\n", __func__, (void *)cma, count, align); diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 270a7df898e2..ac41f848b8a2 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -20,6 +20,7 @@ #include #include "modpost.h" #include "../../include/linux/license.h" +#include "../../include/generated/uapi/linux/version.h" /* Are we using CONFIG_MODVERSIONS? */ static int modversions = 0; @@ -2342,6 +2343,12 @@ static void write_buf(struct buffer *b, const char *fname) } } +static void add_rhelversion(struct buffer *b, struct module *mod) +{ + buf_printf(b, "MODULE_INFO(rhelversion, \"%d.%d\");\n", RHEL_MAJOR, + RHEL_MINOR); +} + static void write_if_changed(struct buffer *b, const char *fname) { char *tmp; @@ -2571,6 +2578,7 @@ int main(int argc, char **argv) add_depends(&buf, mod); add_moddevtable(&buf, mod); add_srcversion(&buf, mod); + add_rhelversion(&buf, mod); sprintf(fname, "%s.mod.c", mod->name); write_if_changed(&buf, fname); diff --git a/scripts/tags.sh b/scripts/tags.sh index db8ba411860a..2294fb0f17a9 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -16,6 +16,8 @@ fi ignore="$(echo "$RCS_FIND_IGNORE" | sed 's|\\||g' )" # tags and cscope files should also ignore MODVERSION *.mod.c files ignore="$ignore ( -name *.mod.c ) -prune -o" +# RHEL tags and cscope should also ignore redhat/rpm +ignore="$ignore ( -path redhat/rpm ) -prune -o" # Use make KBUILD_ABS_SRCTREE=1 {tags|cscope} # to force full paths for a non-O= build diff --git a/security/integrity/platform_certs/load_uefi.c b/security/integrity/platform_certs/load_uefi.c index f290f78c3f30..d3e7ae04f5be 100644 --- a/security/integrity/platform_certs/load_uefi.c +++ b/security/integrity/platform_certs/load_uefi.c @@ -46,7 +46,8 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, return NULL; if (*status != EFI_BUFFER_TOO_SMALL) { - pr_err("Couldn't get size: 0x%lx\n", *status); + pr_err("Couldn't get size: %s (0x%lx)\n", + efi_status_to_str(*status), *status); return NULL; } @@ -57,7 +58,8 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid, *status = efi.get_variable(name, guid, NULL, &lsize, db); if (*status != EFI_SUCCESS) { kfree(db); - pr_err("Error reading db var: 0x%lx\n", *status); + pr_err("Error reading db var: %s (0x%lx)\n", + efi_status_to_str(*status), *status); return NULL; } diff --git a/security/lockdown/Kconfig b/security/lockdown/Kconfig index e84ddf484010..d0501353a4b9 100644 --- a/security/lockdown/Kconfig +++ b/security/lockdown/Kconfig @@ -16,6 +16,19 @@ config SECURITY_LOCKDOWN_LSM_EARLY subsystem is fully initialised. If enabled, lockdown will unconditionally be called before any other LSMs. +config LOCK_DOWN_IN_EFI_SECURE_BOOT + bool "Lock down the kernel in EFI Secure Boot mode" + default n + depends on EFI && SECURITY_LOCKDOWN_LSM_EARLY + help + UEFI Secure Boot provides a mechanism for ensuring that the firmware + will only load signed bootloaders and kernels. Secure boot mode may + be determined from EFI variables provided by the system firmware if + not indicated by the boot parameters. + + Enabling this option results in kernel lockdown being triggered if + EFI Secure Boot is set. + choice prompt "Kernel default lockdown mode" default LOCK_DOWN_KERNEL_FORCE_NONE diff --git a/security/lockdown/lockdown.c b/security/lockdown/lockdown.c index 87cbdc64d272..18555cf18da7 100644 --- a/security/lockdown/lockdown.c +++ b/security/lockdown/lockdown.c @@ -73,6 +73,7 @@ static int lockdown_is_locked_down(enum lockdown_reason what) static struct security_hook_list lockdown_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(locked_down, lockdown_is_locked_down), + LSM_HOOK_INIT(lock_kernel_down, lock_kernel_down), }; static int __init lockdown_lsm_init(void) diff --git a/security/security.c b/security/security.c index 09533cbb7221..d36675494753 100644 --- a/security/security.c +++ b/security/security.c @@ -2598,6 +2598,12 @@ int security_locked_down(enum lockdown_reason what) } EXPORT_SYMBOL(security_locked_down); +int security_lock_kernel_down(const char *where, enum lockdown_reason level) +{ + return call_int_hook(lock_kernel_down, 0, where, level); +} +EXPORT_SYMBOL(security_lock_kernel_down); + #ifdef CONFIG_PERF_EVENTS int security_perf_event_open(struct perf_event_attr *attr, int type) { diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f405b20c1e6c..a8e298a22709 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -424,7 +424,6 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_BPF_OBJS) \ $(TRUNNER_BPF_SKELS) \ $(TRUNNER_BPF_LSKELS) \ - $(TRUNNER_BPF_SKELS_LINKED) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c deleted file mode 100644 index e9916f2817ec..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include -#include -#include "linked_funcs.skel.h" - -void test_linked_funcs(void) -{ - int err; - struct linked_funcs *skel; - - skel = linked_funcs__open(); - if (!ASSERT_OK_PTR(skel, "skel_open")) - return; - - skel->rodata->my_tid = syscall(SYS_gettid); - skel->bss->syscall_id = SYS_getpgid; - - err = linked_funcs__load(skel); - if (!ASSERT_OK(err, "skel_load")) - goto cleanup; - - err = linked_funcs__attach(skel); - if (!ASSERT_OK(err, "skel_attach")) - goto cleanup; - - /* trigger */ - syscall(SYS_getpgid); - - ASSERT_EQ(skel->bss->output_val1, 2000 + 2000, "output_val1"); - ASSERT_EQ(skel->bss->output_ctx1, SYS_getpgid, "output_ctx1"); - ASSERT_EQ(skel->bss->output_weak1, 42, "output_weak1"); - - ASSERT_EQ(skel->bss->output_val2, 2 * 1000 + 2 * (2 * 1000), "output_val2"); - ASSERT_EQ(skel->bss->output_ctx2, SYS_getpgid, "output_ctx2"); - /* output_weak2 should never be updated */ - ASSERT_EQ(skel->bss->output_weak2, 0, "output_weak2"); - -cleanup: - linked_funcs__destroy(skel); -} diff --git a/tools/testing/selftests/bpf/prog_tests/linked_maps.c b/tools/testing/selftests/bpf/prog_tests/linked_maps.c deleted file mode 100644 index 85dcaaaf2775..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/linked_maps.c +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include -#include -#include "linked_maps.skel.h" - -void test_linked_maps(void) -{ - int err; - struct linked_maps *skel; - - skel = linked_maps__open_and_load(); - if (!ASSERT_OK_PTR(skel, "skel_open")) - return; - - err = linked_maps__attach(skel); - if (!ASSERT_OK(err, "skel_attach")) - goto cleanup; - - /* trigger */ - syscall(SYS_getpgid); - - ASSERT_EQ(skel->bss->output_first1, 2000, "output_first1"); - ASSERT_EQ(skel->bss->output_second1, 2, "output_second1"); - ASSERT_EQ(skel->bss->output_weak1, 2, "output_weak1"); - -cleanup: - linked_maps__destroy(skel); -} diff --git a/tools/testing/selftests/bpf/prog_tests/linked_vars.c b/tools/testing/selftests/bpf/prog_tests/linked_vars.c deleted file mode 100644 index 267166abe4c1..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/linked_vars.c +++ /dev/null @@ -1,43 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include -#include -#include "linked_vars.skel.h" - -void test_linked_vars(void) -{ - int err; - struct linked_vars *skel; - - skel = linked_vars__open(); - if (!ASSERT_OK_PTR(skel, "skel_open")) - return; - - skel->bss->input_bss1 = 1000; - skel->bss->input_bss2 = 2000; - skel->bss->input_bss_weak = 3000; - - err = linked_vars__load(skel); - if (!ASSERT_OK(err, "skel_load")) - goto cleanup; - - err = linked_vars__attach(skel); - if (!ASSERT_OK(err, "skel_attach")) - goto cleanup; - - /* trigger */ - syscall(SYS_getpgid); - - ASSERT_EQ(skel->bss->output_bss1, 1000 + 2000 + 3000, "output_bss1"); - ASSERT_EQ(skel->bss->output_bss2, 1000 + 2000 + 3000, "output_bss2"); - /* 10 comes from "winner" input_data_weak in first obj file */ - ASSERT_EQ(skel->bss->output_data1, 1 + 2 + 10, "output_bss1"); - ASSERT_EQ(skel->bss->output_data2, 1 + 2 + 10, "output_bss2"); - /* 100 comes from "winner" input_rodata_weak in first obj file */ - ASSERT_EQ(skel->bss->output_rodata1, 11 + 22 + 100, "output_weak1"); - ASSERT_EQ(skel->bss->output_rodata2, 11 + 22 + 100, "output_weak2"); - -cleanup: - linked_vars__destroy(skel); -} diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c deleted file mode 100644 index f62df4d023f9..000000000000 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ /dev/null @@ -1,545 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -/* WARNING: This implemenation is not necessarily the same - * as the tcp_cubic.c. The purpose is mainly for testing - * the kernel BPF logic. - * - * Highlights: - * 1. CONFIG_HZ .kconfig map is used. - * 2. In bictcp_update(), calculation is changed to use usec - * resolution (i.e. USEC_PER_JIFFY) instead of using jiffies. - * Thus, usecs_to_jiffies() is not used in the bpf_cubic.c. - * 3. In bitctcp_update() [under tcp_friendliness], the original - * "while (ca->ack_cnt > delta)" loop is changed to the equivalent - * "ca->ack_cnt / delta" operation. - */ - -#include -#include -#include -#include "bpf_tcp_helpers.h" - -char _license[] SEC("license") = "GPL"; - -#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) - -#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation - * max_cwnd = snd_cwnd * beta - */ -#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */ - -/* Two methods of hybrid slow start */ -#define HYSTART_ACK_TRAIN 0x1 -#define HYSTART_DELAY 0x2 - -/* Number of delay samples for detecting the increase of delay */ -#define HYSTART_MIN_SAMPLES 8 -#define HYSTART_DELAY_MIN (4000U) /* 4ms */ -#define HYSTART_DELAY_MAX (16000U) /* 16 ms */ -#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) - -static int fast_convergence = 1; -static const int beta = 717; /* = 717/1024 (BICTCP_BETA_SCALE) */ -static int initial_ssthresh; -static const int bic_scale = 41; -static int tcp_friendliness = 1; - -static int hystart = 1; -static int hystart_detect = HYSTART_ACK_TRAIN | HYSTART_DELAY; -static int hystart_low_window = 16; -static int hystart_ack_delta_us = 2000; - -static const __u32 cube_rtt_scale = (bic_scale * 10); /* 1024*c/rtt */ -static const __u32 beta_scale = 8*(BICTCP_BETA_SCALE+beta) / 3 - / (BICTCP_BETA_SCALE - beta); -/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3 - * so K = cubic_root( (wmax-cwnd)*rtt/c ) - * the unit of K is bictcp_HZ=2^10, not HZ - * - * c = bic_scale >> 10 - * rtt = 100ms - * - * the following code has been designed and tested for - * cwnd < 1 million packets - * RTT < 100 seconds - * HZ < 1,000,00 (corresponding to 10 nano-second) - */ - -/* 1/c * 2^2*bictcp_HZ * srtt, 2^40 */ -static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ)) - / (bic_scale * 10); - -/* BIC TCP Parameters */ -struct bictcp { - __u32 cnt; /* increase cwnd by 1 after ACKs */ - __u32 last_max_cwnd; /* last maximum snd_cwnd */ - __u32 last_cwnd; /* the last snd_cwnd */ - __u32 last_time; /* time when updated last_cwnd */ - __u32 bic_origin_point;/* origin point of bic function */ - __u32 bic_K; /* time to origin point - from the beginning of the current epoch */ - __u32 delay_min; /* min delay (usec) */ - __u32 epoch_start; /* beginning of an epoch */ - __u32 ack_cnt; /* number of acks */ - __u32 tcp_cwnd; /* estimated tcp cwnd */ - __u16 unused; - __u8 sample_cnt; /* number of samples to decide curr_rtt */ - __u8 found; /* the exit point is found? */ - __u32 round_start; /* beginning of each round */ - __u32 end_seq; /* end_seq of the round */ - __u32 last_ack; /* last time when the ACK spacing is close */ - __u32 curr_rtt; /* the minimum rtt of current round */ -}; - -static inline void bictcp_reset(struct bictcp *ca) -{ - ca->cnt = 0; - ca->last_max_cwnd = 0; - ca->last_cwnd = 0; - ca->last_time = 0; - ca->bic_origin_point = 0; - ca->bic_K = 0; - ca->delay_min = 0; - ca->epoch_start = 0; - ca->ack_cnt = 0; - ca->tcp_cwnd = 0; - ca->found = 0; -} - -extern unsigned long CONFIG_HZ __kconfig; -#define HZ CONFIG_HZ -#define USEC_PER_MSEC 1000UL -#define USEC_PER_SEC 1000000UL -#define USEC_PER_JIFFY (USEC_PER_SEC / HZ) - -static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor) -{ - return dividend / divisor; -} - -#define div64_ul div64_u64 - -#define BITS_PER_U64 (sizeof(__u64) * 8) -static __always_inline int fls64(__u64 x) -{ - int num = BITS_PER_U64 - 1; - - if (x == 0) - return 0; - - if (!(x & (~0ull << (BITS_PER_U64-32)))) { - num -= 32; - x <<= 32; - } - if (!(x & (~0ull << (BITS_PER_U64-16)))) { - num -= 16; - x <<= 16; - } - if (!(x & (~0ull << (BITS_PER_U64-8)))) { - num -= 8; - x <<= 8; - } - if (!(x & (~0ull << (BITS_PER_U64-4)))) { - num -= 4; - x <<= 4; - } - if (!(x & (~0ull << (BITS_PER_U64-2)))) { - num -= 2; - x <<= 2; - } - if (!(x & (~0ull << (BITS_PER_U64-1)))) - num -= 1; - - return num + 1; -} - -static __always_inline __u32 bictcp_clock_us(const struct sock *sk) -{ - return tcp_sk(sk)->tcp_mstamp; -} - -static __always_inline void bictcp_hystart_reset(struct sock *sk) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); - - ca->round_start = ca->last_ack = bictcp_clock_us(sk); - ca->end_seq = tp->snd_nxt; - ca->curr_rtt = ~0U; - ca->sample_cnt = 0; -} - -/* "struct_ops/" prefix is not a requirement - * It will be recognized as BPF_PROG_TYPE_STRUCT_OPS - * as long as it is used in one of the func ptr - * under SEC(".struct_ops"). - */ -SEC("struct_ops/bpf_cubic_init") -void BPF_PROG(bpf_cubic_init, struct sock *sk) -{ - struct bictcp *ca = inet_csk_ca(sk); - - bictcp_reset(ca); - - if (hystart) - bictcp_hystart_reset(sk); - - if (!hystart && initial_ssthresh) - tcp_sk(sk)->snd_ssthresh = initial_ssthresh; -} - -/* No prefix in SEC will also work. - * The remaining tcp-cubic functions have an easier way. - */ -SEC("no-sec-prefix-bictcp_cwnd_event") -void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event) -{ - if (event == CA_EVENT_TX_START) { - struct bictcp *ca = inet_csk_ca(sk); - __u32 now = tcp_jiffies32; - __s32 delta; - - delta = now - tcp_sk(sk)->lsndtime; - - /* We were application limited (idle) for a while. - * Shift epoch_start to keep cwnd growth to cubic curve. - */ - if (ca->epoch_start && delta > 0) { - ca->epoch_start += delta; - if (after(ca->epoch_start, now)) - ca->epoch_start = now; - } - return; - } -} - -/* - * cbrt(x) MSB values for x MSB values in [0..63]. - * Precomputed then refined by hand - Willy Tarreau - * - * For x in [0..63], - * v = cbrt(x << 18) - 1 - * cbrt(x) = (v[x] + 10) >> 6 - */ -static const __u8 v[] = { - /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118, - /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156, - /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179, - /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199, - /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215, - /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229, - /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242, - /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254, -}; - -/* calculate the cubic root of x using a table lookup followed by one - * Newton-Raphson iteration. - * Avg err ~= 0.195% - */ -static __always_inline __u32 cubic_root(__u64 a) -{ - __u32 x, b, shift; - - if (a < 64) { - /* a in [0..63] */ - return ((__u32)v[(__u32)a] + 35) >> 6; - } - - b = fls64(a); - b = ((b * 84) >> 8) - 1; - shift = (a >> (b * 3)); - - /* it is needed for verifier's bound check on v */ - if (shift >= 64) - return 0; - - x = ((__u32)(((__u32)v[shift] + 10) << b)) >> 6; - - /* - * Newton-Raphson iteration - * 2 - * x = ( 2 * x + a / x ) / 3 - * k+1 k k - */ - x = (2 * x + (__u32)div64_u64(a, (__u64)x * (__u64)(x - 1))); - x = ((x * 341) >> 10); - return x; -} - -/* - * Compute congestion window to use. - */ -static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd, - __u32 acked) -{ - __u32 delta, bic_target, max_cnt; - __u64 offs, t; - - ca->ack_cnt += acked; /* count the number of ACKed packets */ - - if (ca->last_cwnd == cwnd && - (__s32)(tcp_jiffies32 - ca->last_time) <= HZ / 32) - return; - - /* The CUBIC function can update ca->cnt at most once per jiffy. - * On all cwnd reduction events, ca->epoch_start is set to 0, - * which will force a recalculation of ca->cnt. - */ - if (ca->epoch_start && tcp_jiffies32 == ca->last_time) - goto tcp_friendliness; - - ca->last_cwnd = cwnd; - ca->last_time = tcp_jiffies32; - - if (ca->epoch_start == 0) { - ca->epoch_start = tcp_jiffies32; /* record beginning */ - ca->ack_cnt = acked; /* start counting */ - ca->tcp_cwnd = cwnd; /* syn with cubic */ - - if (ca->last_max_cwnd <= cwnd) { - ca->bic_K = 0; - ca->bic_origin_point = cwnd; - } else { - /* Compute new K based on - * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ) - */ - ca->bic_K = cubic_root(cube_factor - * (ca->last_max_cwnd - cwnd)); - ca->bic_origin_point = ca->last_max_cwnd; - } - } - - /* cubic function - calc*/ - /* calculate c * time^3 / rtt, - * while considering overflow in calculation of time^3 - * (so time^3 is done by using 64 bit) - * and without the support of division of 64bit numbers - * (so all divisions are done by using 32 bit) - * also NOTE the unit of those veriables - * time = (t - K) / 2^bictcp_HZ - * c = bic_scale >> 10 - * rtt = (srtt >> 3) / HZ - * !!! The following code does not have overflow problems, - * if the cwnd < 1 million packets !!! - */ - - t = (__s32)(tcp_jiffies32 - ca->epoch_start) * USEC_PER_JIFFY; - t += ca->delay_min; - /* change the unit from usec to bictcp_HZ */ - t <<= BICTCP_HZ; - t /= USEC_PER_SEC; - - if (t < ca->bic_K) /* t - K */ - offs = ca->bic_K - t; - else - offs = t - ca->bic_K; - - /* c/rtt * (t-K)^3 */ - delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ); - if (t < ca->bic_K) /* below origin*/ - bic_target = ca->bic_origin_point - delta; - else /* above origin*/ - bic_target = ca->bic_origin_point + delta; - - /* cubic function - calc bictcp_cnt*/ - if (bic_target > cwnd) { - ca->cnt = cwnd / (bic_target - cwnd); - } else { - ca->cnt = 100 * cwnd; /* very small increment*/ - } - - /* - * The initial growth of cubic function may be too conservative - * when the available bandwidth is still unknown. - */ - if (ca->last_max_cwnd == 0 && ca->cnt > 20) - ca->cnt = 20; /* increase cwnd 5% per RTT */ - -tcp_friendliness: - /* TCP Friendly */ - if (tcp_friendliness) { - __u32 scale = beta_scale; - __u32 n; - - /* update tcp cwnd */ - delta = (cwnd * scale) >> 3; - if (ca->ack_cnt > delta && delta) { - n = ca->ack_cnt / delta; - ca->ack_cnt -= n * delta; - ca->tcp_cwnd += n; - } - - if (ca->tcp_cwnd > cwnd) { /* if bic is slower than tcp */ - delta = ca->tcp_cwnd - cwnd; - max_cnt = cwnd / delta; - if (ca->cnt > max_cnt) - ca->cnt = max_cnt; - } - } - - /* The maximum rate of cwnd increase CUBIC allows is 1 packet per - * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT. - */ - ca->cnt = max(ca->cnt, 2U); -} - -/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */ -void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); - - if (!tcp_is_cwnd_limited(sk)) - return; - - if (tcp_in_slow_start(tp)) { - if (hystart && after(ack, ca->end_seq)) - bictcp_hystart_reset(sk); - acked = tcp_slow_start(tp, acked); - if (!acked) - return; - } - bictcp_update(ca, tp->snd_cwnd, acked); - tcp_cong_avoid_ai(tp, ca->cnt, acked); -} - -__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); - - ca->epoch_start = 0; /* end of epoch */ - - /* Wmax and fast convergence */ - if (tp->snd_cwnd < ca->last_max_cwnd && fast_convergence) - ca->last_max_cwnd = (tp->snd_cwnd * (BICTCP_BETA_SCALE + beta)) - / (2 * BICTCP_BETA_SCALE); - else - ca->last_max_cwnd = tp->snd_cwnd; - - return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); -} - -void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state) -{ - if (new_state == TCP_CA_Loss) { - bictcp_reset(inet_csk_ca(sk)); - bictcp_hystart_reset(sk); - } -} - -#define GSO_MAX_SIZE 65536 - -/* Account for TSO/GRO delays. - * Otherwise short RTT flows could get too small ssthresh, since during - * slow start we begin with small TSO packets and ca->delay_min would - * not account for long aggregation delay when TSO packets get bigger. - * Ideally even with a very small RTT we would like to have at least one - * TSO packet being sent and received by GRO, and another one in qdisc layer. - * We apply another 100% factor because @rate is doubled at this point. - * We cap the cushion to 1ms. - */ -static __always_inline __u32 hystart_ack_delay(struct sock *sk) -{ - unsigned long rate; - - rate = sk->sk_pacing_rate; - if (!rate) - return 0; - return min((__u64)USEC_PER_MSEC, - div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate)); -} - -static __always_inline void hystart_update(struct sock *sk, __u32 delay) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); - __u32 threshold; - - if (hystart_detect & HYSTART_ACK_TRAIN) { - __u32 now = bictcp_clock_us(sk); - - /* first detection parameter - ack-train detection */ - if ((__s32)(now - ca->last_ack) <= hystart_ack_delta_us) { - ca->last_ack = now; - - threshold = ca->delay_min + hystart_ack_delay(sk); - - /* Hystart ack train triggers if we get ack past - * ca->delay_min/2. - * Pacing might have delayed packets up to RTT/2 - * during slow start. - */ - if (sk->sk_pacing_status == SK_PACING_NONE) - threshold >>= 1; - - if ((__s32)(now - ca->round_start) > threshold) { - ca->found = 1; - tp->snd_ssthresh = tp->snd_cwnd; - } - } - } - - if (hystart_detect & HYSTART_DELAY) { - /* obtain the minimum delay of more than sampling packets */ - if (ca->curr_rtt > delay) - ca->curr_rtt = delay; - if (ca->sample_cnt < HYSTART_MIN_SAMPLES) { - ca->sample_cnt++; - } else { - if (ca->curr_rtt > ca->delay_min + - HYSTART_DELAY_THRESH(ca->delay_min >> 3)) { - ca->found = 1; - tp->snd_ssthresh = tp->snd_cwnd; - } - } - } -} - -void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, - const struct ack_sample *sample) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct bictcp *ca = inet_csk_ca(sk); - __u32 delay; - - /* Some calls are for duplicates without timetamps */ - if (sample->rtt_us < 0) - return; - - /* Discard delay samples right after fast recovery */ - if (ca->epoch_start && (__s32)(tcp_jiffies32 - ca->epoch_start) < HZ) - return; - - delay = sample->rtt_us; - if (delay == 0) - delay = 1; - - /* first time call or link delay decreases */ - if (ca->delay_min == 0 || ca->delay_min > delay) - ca->delay_min = delay; - - /* hystart triggers when cwnd is larger than some threshold */ - if (!ca->found && tcp_in_slow_start(tp) && hystart && - tp->snd_cwnd >= hystart_low_window) - hystart_update(sk, delay); -} - -extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym; - -__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk) -{ - return tcp_reno_undo_cwnd(sk); -} - -SEC(".struct_ops") -struct tcp_congestion_ops cubic = { - .init = (void *)bpf_cubic_init, - .ssthresh = (void *)bpf_cubic_recalc_ssthresh, - .cong_avoid = (void *)bpf_cubic_cong_avoid, - .set_state = (void *)bpf_cubic_state, - .undo_cwnd = (void *)bpf_cubic_undo_cwnd, - .cwnd_event = (void *)bpf_cubic_cwnd_event, - .pkts_acked = (void *)bpf_cubic_acked, - .name = "bpf_cubic", -}; diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c deleted file mode 100644 index fd42247da8b4..000000000000 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ /dev/null @@ -1,224 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2019 Facebook */ - -/* WARNING: This implemenation is not necessarily the same - * as the tcp_dctcp.c. The purpose is mainly for testing - * the kernel BPF logic. - */ - -#include -#include -#include -#include -#include -#include -#include -#include "bpf_tcp_helpers.h" - -char _license[] SEC("license") = "GPL"; - -int stg_result = 0; - -struct { - __uint(type, BPF_MAP_TYPE_SK_STORAGE); - __uint(map_flags, BPF_F_NO_PREALLOC); - __type(key, int); - __type(value, int); -} sk_stg_map SEC(".maps"); - -#define DCTCP_MAX_ALPHA 1024U - -struct dctcp { - __u32 old_delivered; - __u32 old_delivered_ce; - __u32 prior_rcv_nxt; - __u32 dctcp_alpha; - __u32 next_seq; - __u32 ce_state; - __u32 loss_cwnd; -}; - -static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */ -static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA; - -static __always_inline void dctcp_reset(const struct tcp_sock *tp, - struct dctcp *ca) -{ - ca->next_seq = tp->snd_nxt; - - ca->old_delivered = tp->delivered; - ca->old_delivered_ce = tp->delivered_ce; -} - -SEC("struct_ops/dctcp_init") -void BPF_PROG(dctcp_init, struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct dctcp *ca = inet_csk_ca(sk); - int *stg; - - ca->prior_rcv_nxt = tp->rcv_nxt; - ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); - ca->loss_cwnd = 0; - ca->ce_state = 0; - - stg = bpf_sk_storage_get(&sk_stg_map, (void *)tp, NULL, 0); - if (stg) { - stg_result = *stg; - bpf_sk_storage_delete(&sk_stg_map, (void *)tp); - } - dctcp_reset(tp, ca); -} - -SEC("struct_ops/dctcp_ssthresh") -__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk) -{ - struct dctcp *ca = inet_csk_ca(sk); - struct tcp_sock *tp = tcp_sk(sk); - - ca->loss_cwnd = tp->snd_cwnd; - return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U); -} - -SEC("struct_ops/dctcp_update_alpha") -void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct dctcp *ca = inet_csk_ca(sk); - - /* Expired RTT */ - if (!before(tp->snd_una, ca->next_seq)) { - __u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce; - __u32 alpha = ca->dctcp_alpha; - - /* alpha = (1 - g) * alpha + g * F */ - - alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); - if (delivered_ce) { - __u32 delivered = tp->delivered - ca->old_delivered; - - /* If dctcp_shift_g == 1, a 32bit value would overflow - * after 8 M packets. - */ - delivered_ce <<= (10 - dctcp_shift_g); - delivered_ce /= max(1U, delivered); - - alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA); - } - ca->dctcp_alpha = alpha; - dctcp_reset(tp, ca); - } -} - -static __always_inline void dctcp_react_to_loss(struct sock *sk) -{ - struct dctcp *ca = inet_csk_ca(sk); - struct tcp_sock *tp = tcp_sk(sk); - - ca->loss_cwnd = tp->snd_cwnd; - tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); -} - -SEC("struct_ops/dctcp_state") -void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state) -{ - if (new_state == TCP_CA_Recovery && - new_state != BPF_CORE_READ_BITFIELD(inet_csk(sk), icsk_ca_state)) - dctcp_react_to_loss(sk); - /* We handle RTO in dctcp_cwnd_event to ensure that we perform only - * one loss-adjustment per RTT. - */ -} - -static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (ce_state == 1) - tp->ecn_flags |= TCP_ECN_DEMAND_CWR; - else - tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; -} - -/* Minimal DCTP CE state machine: - * - * S: 0 <- last pkt was non-CE - * 1 <- last pkt was CE - */ -static __always_inline -void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt, - __u32 *prior_rcv_nxt, __u32 *ce_state) -{ - __u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0; - - if (*ce_state != new_ce_state) { - /* CE state has changed, force an immediate ACK to - * reflect the new CE state. If an ACK was delayed, - * send that first to reflect the prior CE state. - */ - if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER) { - dctcp_ece_ack_cwr(sk, *ce_state); - bpf_tcp_send_ack(sk, *prior_rcv_nxt); - } - inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW; - } - *prior_rcv_nxt = tcp_sk(sk)->rcv_nxt; - *ce_state = new_ce_state; - dctcp_ece_ack_cwr(sk, new_ce_state); -} - -SEC("struct_ops/dctcp_cwnd_event") -void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev) -{ - struct dctcp *ca = inet_csk_ca(sk); - - switch (ev) { - case CA_EVENT_ECN_IS_CE: - case CA_EVENT_ECN_NO_CE: - dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state); - break; - case CA_EVENT_LOSS: - dctcp_react_to_loss(sk); - break; - default: - /* Don't care for the rest. */ - break; - } -} - -SEC("struct_ops/dctcp_cwnd_undo") -__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk) -{ - const struct dctcp *ca = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); -} - -extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym; - -SEC("struct_ops/dctcp_reno_cong_avoid") -void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked) -{ - tcp_reno_cong_avoid(sk, ack, acked); -} - -SEC(".struct_ops") -struct tcp_congestion_ops dctcp_nouse = { - .init = (void *)dctcp_init, - .set_state = (void *)dctcp_state, - .flags = TCP_CONG_NEEDS_ECN, - .name = "bpf_dctcp_nouse", -}; - -SEC(".struct_ops") -struct tcp_congestion_ops dctcp = { - .init = (void *)dctcp_init, - .in_ack_event = (void *)dctcp_update_alpha, - .cwnd_event = (void *)dctcp_cwnd_event, - .ssthresh = (void *)dctcp_ssthresh, - .cong_avoid = (void *)dctcp_cong_avoid, - .undo_cwnd = (void *)dctcp_cwnd_undo, - .set_state = (void *)dctcp_state, - .flags = TCP_CONG_NEEDS_ECN, - .name = "bpf_dctcp", -}; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c deleted file mode 100644 index 470f8723e463..000000000000 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ /dev/null @@ -1,47 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ -#include -#include -#include "bpf_tcp_helpers.h" - -extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; -extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, - __u32 c, __u64 d) __ksym; - -SEC("classifier") -int kfunc_call_test2(struct __sk_buff *skb) -{ - struct bpf_sock *sk = skb->sk; - - if (!sk) - return -1; - - sk = bpf_sk_fullsock(sk); - if (!sk) - return -1; - - return bpf_kfunc_call_test2((struct sock *)sk, 1, 2); -} - -SEC("classifier") -int kfunc_call_test1(struct __sk_buff *skb) -{ - struct bpf_sock *sk = skb->sk; - __u64 a = 1ULL << 32; - __u32 ret; - - if (!sk) - return -1; - - sk = bpf_sk_fullsock(sk); - if (!sk) - return -1; - - a = bpf_kfunc_call_test1((struct sock *)sk, 1, a | 2, 3, a | 4); - ret = a >> 32; /* ret should be 2 */ - ret += (__u32)a; /* ret should be 12 */ - - return ret; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c deleted file mode 100644 index b2dcb7d9cb03..000000000000 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ -#include -#include -#include "bpf_tcp_helpers.h" - -extern const int bpf_prog_active __ksym; -extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, - __u32 c, __u64 d) __ksym; -extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; -int active_res = -1; -int sk_state = -1; - -int __noinline f1(struct __sk_buff *skb) -{ - struct bpf_sock *sk = skb->sk; - int *active; - - if (!sk) - return -1; - - sk = bpf_sk_fullsock(sk); - if (!sk) - return -1; - - active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, - bpf_get_smp_processor_id()); - if (active) - active_res = *active; - - sk_state = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state; - - return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4); -} - -SEC("classifier") -int kfunc_call_test1(struct __sk_buff *skb) -{ - return f1(skb); -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c deleted file mode 100644 index b964ec1390c2..000000000000 --- a/tools/testing/selftests/bpf/progs/linked_funcs1.c +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include "vmlinux.h" -#include -#include - -/* weak and shared between two files */ -const volatile int my_tid __weak; -long syscall_id __weak; - -int output_val1; -int output_ctx1; -int output_weak1; - -/* same "subprog" name in all files, but it's ok because they all are static */ -static __noinline int subprog(int x) -{ - /* but different formula */ - return x * 1; -} - -/* Global functions can't be void */ -int set_output_val1(int x) -{ - output_val1 = x + subprog(x); - return x; -} - -/* This function can't be verified as global, as it assumes raw_tp/sys_enter - * context and accesses syscall id (second argument). So we mark it as - * __hidden, so that libbpf will mark it as static in the final object file, - * right before verifying it in the kernel. - * - * But we don't mark it as __hidden here, rather at extern site. __hidden is - * "contaminating" visibility, so it will get propagated from either extern or - * actual definition (including from the losing __weak definition). - */ -void set_output_ctx1(__u64 *ctx) -{ - output_ctx1 = ctx[1]; /* long id, same as in BPF_PROG below */ -} - -/* this weak instance should win because it's the first one */ -__weak int set_output_weak(int x) -{ - output_weak1 = x; - return x; -} - -extern int set_output_val2(int x); - -/* here we'll force set_output_ctx2() to be __hidden in the final obj file */ -__hidden extern void set_output_ctx2(__u64 *ctx); - -SEC("raw_tp/sys_enter") -int BPF_PROG(handler1, struct pt_regs *regs, long id) -{ - if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) - return 0; - - set_output_val2(1000); - set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */ - - /* keep input value the same across both files to avoid dependency on - * handler call order; differentiate by output_weak1 vs output_weak2. - */ - set_output_weak(42); - - return 0; -} - -char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c deleted file mode 100644 index 575e958e60b7..000000000000 --- a/tools/testing/selftests/bpf/progs/linked_funcs2.c +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include "vmlinux.h" -#include -#include - -/* weak and shared between both files */ -const volatile int my_tid __weak; -long syscall_id __weak; - -int output_val2; -int output_ctx2; -int output_weak2; /* should stay zero */ - -/* same "subprog" name in all files, but it's ok because they all are static */ -static __noinline int subprog(int x) -{ - /* but different formula */ - return x * 2; -} - -/* Global functions can't be void */ -int set_output_val2(int x) -{ - output_val2 = 2 * x + 2 * subprog(x); - return 2 * x; -} - -/* This function can't be verified as global, as it assumes raw_tp/sys_enter - * context and accesses syscall id (second argument). So we mark it as - * __hidden, so that libbpf will mark it as static in the final object file, - * right before verifying it in the kernel. - * - * But we don't mark it as __hidden here, rather at extern site. __hidden is - * "contaminating" visibility, so it will get propagated from either extern or - * actual definition (including from the losing __weak definition). - */ -void set_output_ctx2(__u64 *ctx) -{ - output_ctx2 = ctx[1]; /* long id, same as in BPF_PROG below */ -} - -/* this weak instance should lose, because it will be processed second */ -__weak int set_output_weak(int x) -{ - output_weak2 = x; - return 2 * x; -} - -extern int set_output_val1(int x); - -/* here we'll force set_output_ctx1() to be __hidden in the final obj file */ -__hidden extern void set_output_ctx1(__u64 *ctx); - -SEC("raw_tp/sys_enter") -int BPF_PROG(handler2, struct pt_regs *regs, long id) -{ - if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id) - return 0; - - set_output_val1(2000); - set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */ - - /* keep input value the same across both files to avoid dependency on - * handler call order; differentiate by output_weak1 vs output_weak2. - */ - set_output_weak(42); - - return 0; -} - -char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_maps2.c b/tools/testing/selftests/bpf/progs/linked_maps2.c deleted file mode 100644 index 0693687474ed..000000000000 --- a/tools/testing/selftests/bpf/progs/linked_maps2.c +++ /dev/null @@ -1,76 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include "vmlinux.h" -#include -#include - -/* modifiers and typedefs are ignored when comparing key/value types */ -typedef struct my_key { long x; } key_type; -typedef struct my_value { long x; } value_type; - -extern struct { - __uint(max_entries, 16); - __type(key, key_type); - __type(value, value_type); - __uint(type, BPF_MAP_TYPE_HASH); -} map1 SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, int); - __type(value, int); - __uint(max_entries, 8); -} map2 SEC(".maps"); - -/* this definition will lose, but it has to exactly match the winner */ -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, int); - __type(value, int); - __uint(max_entries, 16); -} map_weak __weak SEC(".maps"); - -int output_first2; -int output_second2; -int output_weak2; - -SEC("raw_tp/sys_enter") -int BPF_PROG(handler_enter2) -{ - /* update values with key = 2 */ - int key = 2, val = 2; - key_type key_struct = { .x = 2 }; - value_type val_struct = { .x = 2000 }; - - bpf_map_update_elem(&map1, &key_struct, &val_struct, 0); - bpf_map_update_elem(&map2, &key, &val, 0); - bpf_map_update_elem(&map_weak, &key, &val, 0); - - return 0; -} - -SEC("raw_tp/sys_exit") -int BPF_PROG(handler_exit2) -{ - /* lookup values with key = 1, set in another file */ - int key = 1, *val; - key_type key_struct = { .x = 1 }; - value_type *value_struct; - - value_struct = bpf_map_lookup_elem(&map1, &key_struct); - if (value_struct) - output_first2 = value_struct->x; - - val = bpf_map_lookup_elem(&map2, &key); - if (val) - output_second2 = *val; - - val = bpf_map_lookup_elem(&map_weak, &key); - if (val) - output_weak2 = *val; - - return 0; -} - -char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_vars1.c b/tools/testing/selftests/bpf/progs/linked_vars1.c deleted file mode 100644 index ef9e9d0bb0ca..000000000000 --- a/tools/testing/selftests/bpf/progs/linked_vars1.c +++ /dev/null @@ -1,54 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include "vmlinux.h" -#include -#include - -extern int LINUX_KERNEL_VERSION __kconfig; -/* this weak extern will be strict due to the other file's strong extern */ -extern bool CONFIG_BPF_SYSCALL __kconfig __weak; -extern const void bpf_link_fops __ksym __weak; - -int input_bss1; -int input_data1 = 1; -const volatile int input_rodata1 = 11; - -int input_bss_weak __weak; -/* these two definitions should win */ -int input_data_weak __weak = 10; -const volatile int input_rodata_weak __weak = 100; - -extern int input_bss2; -extern int input_data2; -extern const int input_rodata2; - -int output_bss1; -int output_data1; -int output_rodata1; - -long output_sink1; - -static __noinline int get_bss_res(void) -{ - /* just make sure all the relocations work against .text as well */ - return input_bss1 + input_bss2 + input_bss_weak; -} - -SEC("raw_tp/sys_enter") -int BPF_PROG(handler1) -{ - output_bss1 = get_bss_res(); - output_data1 = input_data1 + input_data2 + input_data_weak; - output_rodata1 = input_rodata1 + input_rodata2 + input_rodata_weak; - - /* make sure we actually use above special externs, otherwise compiler - * will optimize them out - */ - output_sink1 = LINUX_KERNEL_VERSION - + CONFIG_BPF_SYSCALL - + (long)&bpf_link_fops; - return 0; -} - -char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_vars2.c b/tools/testing/selftests/bpf/progs/linked_vars2.c deleted file mode 100644 index e4f5bd388a3c..000000000000 --- a/tools/testing/selftests/bpf/progs/linked_vars2.c +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2021 Facebook */ - -#include "vmlinux.h" -#include -#include - -extern int LINUX_KERNEL_VERSION __kconfig; -/* when an extern is defined as both strong and weak, resulting symbol will be strong */ -extern bool CONFIG_BPF_SYSCALL __kconfig; -extern const void __start_BTF __ksym; - -int input_bss2; -int input_data2 = 2; -const volatile int input_rodata2 = 22; - -int input_bss_weak __weak; -/* these two weak variables should lose */ -int input_data_weak __weak = 20; -const volatile int input_rodata_weak __weak = 200; - -extern int input_bss1; -extern int input_data1; -extern const int input_rodata1; - -int output_bss2; -int output_data2; -int output_rodata2; - -int output_sink2; - -static __noinline int get_data_res(void) -{ - /* just make sure all the relocations work against .text as well */ - return input_data1 + input_data2 + input_data_weak; -} - -SEC("raw_tp/sys_enter") -int BPF_PROG(handler2) -{ - output_bss2 = input_bss1 + input_bss2 + input_bss_weak; - output_data2 = get_data_res(); - output_rodata2 = input_rodata1 + input_rodata2 + input_rodata_weak; - - /* make sure we actually use above special externs, otherwise compiler - * will optimize them out - */ - output_sink2 = LINUX_KERNEL_VERSION - + CONFIG_BPF_SYSCALL - + (long)&__start_BTF; - - return 0; -} - -char LICENSE[] SEC("license") = "GPL";