From 2eda344546caaf9168e778a4007f4609e95106e0 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 11 Aug 2008 10:18:39 +0200 Subject: ALSA: hda - Add a new function to seek for a codec ID Gateway notebooks have their ID inside codec vendor ID, not at PCI ID. Due to that, model auto-detection were not possible with the standard seek method. This is what is found at lspci -vnn: 00:14.2 Audio device [0403]: ATI Technologies Inc SB450 HDA Audio [1002:437b] (rev 01) Subsystem: ATI Technologies Inc SB450 HDA Audio [1002:437b] Yet, autodetection is possible, since the codec properly reflects the vendor at the Subsystem ID: $ cat /proc/asound/card0/codec#0 |head -4 Codec: SigmaTel STAC9250 Address: 0 Vendor Id: 0x83847634 Subsystem Id: 0x107b0367 This patch adds a new autodetection function that seeks for codec subsystem ID. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++ sound/pci/hda/hda_local.h | 3 +++ 2 files changed, 64 insertions(+) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 004344825e9..9c1af0101dd 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2704,6 +2704,67 @@ int snd_hda_check_board_config(struct hda_codec *codec, } EXPORT_SYMBOL_HDA(snd_hda_check_board_config); +/** + * snd_hda_check_board_codec_sid_config - compare the current codec + subsystem ID with the + config table + + This is important for Gateway notebooks with SB450 HDA Audio + where the vendor ID of the PCI device is: + ATI Technologies Inc SB450 HDA Audio [1002:437b] + and the vendor/subvendor are found only at the codec. + + * @codec: the HDA codec + * @num_configs: number of config enums + * @models: array of model name strings + * @tbl: configuration table, terminated by null entries + * + * Compares the modelname or PCI subsystem id of the current codec with the + * given configuration table. If a matching entry is found, returns its + * config value (supposed to be 0 or positive). + * + * If no entries are matching, the function returns a negative value. + */ +int snd_hda_check_board_codec_sid_config(struct hda_codec *codec, + int num_configs, const char **models, + const struct snd_pci_quirk *tbl) +{ + const struct snd_pci_quirk *q; + + /* Search for codec ID */ + for (q = tbl; q->subvendor; q++) { + unsigned long vendorid = (q->subdevice) | (q->subvendor << 16); + + if (vendorid == codec->subsystem_id) + break; + } + + if (!q->subvendor) + return -1; + + tbl = q; + + if (tbl->value >= 0 && tbl->value < num_configs) { +#ifdef CONFIG_SND_DEBUG_DETECT + char tmp[10]; + const char *model = NULL; + if (models) + model = models[tbl->value]; + if (!model) { + sprintf(tmp, "#%d", tbl->value); + model = tmp; + } + snd_printdd(KERN_INFO "hda_codec: model '%s' is selected " + "for config %x:%x (%s)\n", + model, tbl->subvendor, tbl->subdevice, + (tbl->name ? tbl->name : "Unknown device")); +#endif + return tbl->value; + } + return -1; +} +EXPORT_SYMBOL_HDA(snd_hda_check_board_codec_sid_config); + /** * snd_hda_add_new_ctls - create controls from the array * @codec: the HDA codec diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h index 6f2fe0f9fdd..1dd8716c387 100644 --- a/sound/pci/hda/hda_local.h +++ b/sound/pci/hda/hda_local.h @@ -296,6 +296,9 @@ void snd_print_pcm_bits(int pcm, char *buf, int buflen); int snd_hda_check_board_config(struct hda_codec *codec, int num_configs, const char **modelnames, const struct snd_pci_quirk *pci_list); +int snd_hda_check_board_codec_sid_config(struct hda_codec *codec, + int num_configs, const char **models, + const struct snd_pci_quirk *tbl); int snd_hda_add_new_ctls(struct hda_codec *codec, struct snd_kcontrol_new *knew); -- cgit From 9cb36c2afc298a2f2085ae0071924b7e3f55e72d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 11 Aug 2008 10:18:39 +0200 Subject: ALSA: patch_sigmatel: Add missing Gateway entries and autodetection Gateway autodetection and entries are incomplete. This patch adds the entries found at the .INI file for their driver version 5.10.5082.0. It also uses the proper code to seek for notebook ID, since this is based on codec subsystem ID on those devices. This should provide a proper pinup for several gateways notebooks: Gateway M465-E Notebook [Part #1008637] Gateway M465-G Notebook [Part #1008667] Gateway NX260X Notebook [Part #1008794] Gateway NX100X Notebook [Part #1008798] Gateway E-100M Notebook [Part #1008799] Gateway E-100M G Notebook [Part #1008800] Gateway M255-E Notebook [Part #1008801] Gateway M255-G Notebook [Part #1008803] Gateway M285-E Convertible Notebook [Part #1008804] Gateway M285-G Convertible Notebook [Part #1008805] Gateway CX210S Convertible Notebook [Part #1008807] Gateway CX210X Convertible Notebook [Part #1008808] Gateway E-100M SB Notebook [Part #1008973] Gateway M255-E SB Notebook [Part #1008989] Gateway M285-E SB Convertible Notebook [Part #1008990] Gateway M465-E Notebook [Part #1009022] Gateway CX2724 Convertible Notebook [Part #1009036] Gateway MX1025 Notebook [Part #1009046] Gateway CX2720 Convertible Notebook [Part #1009063] Gateway CX2724h Convertible Notebook [Part #1009089] Gateway MX1023 Notebook [Part #1009097] Gateway MX1023h Notebook [Part #1009098] Gateway NX260X Notebook [Part #1009112] Gateway E-100M Notebook [Part #1009126] Gateway MX7533 Notebook [Part #1009146] [Part #1009163] Gateway CX210X Convertible Notebook [Part #1009346] Gateway NX570X Notebook [Part #1009442] Gateway NX570X Notebook [Part #1009448] Gateway NX270S Notebook [Part #1009550] Gateway MX6448 Notebook [Part #1013912R] Gateway MX6453 Notebook [Part #1013913R] Gateway MX6216 Notebook [Part #1013916R] Gateway MX6931 Notebook [Part #1013918R] Gateway CX2726 Convertible Notebook [Part #1013921R] Gateway MP8708 Notebook [Part #1013924R] Gateway MX6446 Notebook [Part #1013927R] Gateway MX6930 Notebook [Part #1013928R] Gateway MX6447 Notebook [Part #1013932R] Gateway MX6454 Notebook [Part #1013943R] Gateway MX6439 Notebook [Part #1013947R] [Part #1013955R] [Part #1013971R] Gateway MX6930h Notebook [Part #1013973R] [Part #1013974R] [Part #1013975R] Gateway MX6955 Notebook [Part #1014028R] Gateway MX6956 Notebook [Part #1014033R] Gateway MX6959 Notebook [Part #1014061R] Gateway MX6957 Notebook [Part #1014065R] Gateway MX6960 Notebook [Part #1014068R] Gateway MX6958 Notebook [Part #1014072R] Gateway NX570X Notebook [Part #1014077R] Gateway NX570XL Notebook [Part #1014078R] Gateway NX570QS Notebook [Part #1014079R] Gateway MX6961 Notebook [Part #1014080R] [Part #1014106R] Gateway MX6961h Notebook [Part #1014112R] Gateway NX270S Notebook [Part #1014120R] Gateway MX6431 Notebook [Part #1014121R] Gateway MX8710 Notebook [Part #2905895R] Gateway MX3702 Notebook [Part #2905898R] Blade-K8F GW UMA Single Core Motherboard w/RS485M and 1394 - Quanta (FRU) [Part #4006133R] Since some entries conflict with existing pinups, I'm providing a separate patch to fix those entries. Tested only with Gateway MX6453. Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_sigmatel.c | 89 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 81 insertions(+), 8 deletions(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 9e07f44ab28..e70ef050530 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -93,7 +93,13 @@ enum { enum { STAC_925x_REF, + STAC_M1, + STAC_M1_2, + STAC_M2, STAC_M2_2, + STAC_M3, + STAC_M5, + STAC_M6, STAC_MA6, STAC_PA6, STAC_925x_MODELS @@ -1600,11 +1606,40 @@ static unsigned int ref925x_pin_configs[8] = { 0x90a70320, 0x02214210, 0x01019020, 0x9033032e, }; +static unsigned int stac925xM1_pin_configs[8] = { + 0x40c003f4, 0x424503f2, 0x400000f3, 0x02a19020, + 0x40a000f0, 0x90100210, 0x400003f1, 0x9033032e, +}; +static unsigned int stac925xM1_2_pin_configs[8] = { + 0x40c003f4, 0x424503f2, 0x400000f3, 0x02a19020, + 0x40a000f0, 0x90100210, 0x400003f1, 0x9033032e, +}; +static unsigned int stac925xM2_pin_configs[8] = { + 0x40c003f4, 0x424503f2, 0x400000f3, 0x02a19020, + 0x40a000f0, 0x90100210, 0x400003f1, 0x9033032e, +}; + +static unsigned int stac925xM3_pin_configs[8] = { + 0x40c003f4, 0x424503f2, 0x400000f3, 0x02a19020, + 0x40a000f0, 0x90100210, 0x400003f1, 0x503303f3, +}; +static unsigned int stac925xM5_pin_configs[8] = { + 0x40c003f4, 0x424503f2, 0x400000f3, 0x02a19020, + 0x40a000f0, 0x90100210, 0x400003f1, 0x9033032e, +}; + +/* Original M6 entry at .ini file */ +static unsigned int stac925xM6_pin_configs[8] = { + 0x40c003f4, 0x424503f2, 0x400000f3, 0x02a19020, + 0x40a000f0, 0x90100210, 0x400003f1, 0x90330320, +}; + static unsigned int stac925x_MA6_pin_configs[8] = { 0x40c003f0, 0x424503f2, 0x01813022, 0x02a19021, 0x90a70320, 0x90100211, 0x400003f1, 0x9033032e, }; +/* This entry should be M2 */ static unsigned int stac925x_PA6_pin_configs[8] = { 0x40c003f0, 0x424503f2, 0x01813022, 0x02a19021, 0x50a103f0, 0x90100211, 0x400003f1, 0x9033032e, @@ -1617,27 +1652,55 @@ static unsigned int stac925xM2_2_pin_configs[8] = { static unsigned int *stac925x_brd_tbl[STAC_925x_MODELS] = { [STAC_REF] = ref925x_pin_configs, + [STAC_M1] = stac925xM1_pin_configs, + [STAC_M1_2] = stac925xM1_2_pin_configs, + [STAC_M2] = stac925xM2_pin_configs, [STAC_M2_2] = stac925xM2_2_pin_configs, + [STAC_M3] = stac925xM3_pin_configs, + [STAC_M5] = stac925xM5_pin_configs, + [STAC_M6] = stac925xM6_pin_configs, [STAC_MA6] = stac925x_MA6_pin_configs, [STAC_PA6] = stac925x_PA6_pin_configs, }; static const char *stac925x_models[STAC_925x_MODELS] = { [STAC_REF] = "ref", + [STAC_M1] = "m1", + [STAC_M1_2] = "m1-2", + [STAC_M2] = "m2", [STAC_M2_2] = "m2-2", - [STAC_MA6] = "m6", + [STAC_M3] = "m3", + [STAC_M5] = "m5", + [STAC_M6] = "m6", + [STAC_MA6] = "ma6", [STAC_PA6] = "pa6", }; -static struct snd_pci_quirk stac925x_cfg_tbl[] = { - /* SigmaTel reference board */ - SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x2668, "DFI LanParty", STAC_REF), - SND_PCI_QUIRK(0x8384, 0x7632, "Stac9202 Reference Board", STAC_REF), +static struct snd_pci_quirk stac925x_codec_id_cfg_tbl[] = { SND_PCI_QUIRK(0x107b, 0x0316, "Gateway M255", STAC_REF), SND_PCI_QUIRK(0x107b, 0x0366, "Gateway MP6954", STAC_REF), + SND_PCI_QUIRK(0x107b, 0x0461, "Gateway NX560XL", STAC_MA6), SND_PCI_QUIRK(0x107b, 0x0681, "Gateway NX860", STAC_PA6), - SND_PCI_QUIRK(0x1002, 0x437b, "Gateway MX6453", STAC_M2_2), + SND_PCI_QUIRK(0x107b, 0x0367, "Gateway MX6453", STAC_M1_2), + + /* Not sure about the brand name for those */ + SND_PCI_QUIRK(0x107b, 0x0281, "Gateway mobile", STAC_M1), + SND_PCI_QUIRK(0x107b, 0x0507, "Gateway mobile", STAC_M3), + SND_PCI_QUIRK(0x107b, 0x0281, "Gateway mobile", STAC_M6), + SND_PCI_QUIRK(0x107b, 0x0685, "Gateway mobile", STAC_M2_2), + + {} /* terminator */ +}; + +static struct snd_pci_quirk stac925x_cfg_tbl[] = { + /* SigmaTel reference board */ + SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x2668, "DFI LanParty", STAC_REF), + SND_PCI_QUIRK(0x8384, 0x7632, "Stac9202 Reference Board", STAC_REF), + + /* Default table for unknown ID */ + SND_PCI_QUIRK(0x1002, 0x437b, "Gateway mobile", STAC_M2_2), + {} /* terminator */ }; @@ -4371,12 +4434,22 @@ static int patch_stac925x(struct hda_codec *codec) codec->spec = spec; spec->num_pins = ARRAY_SIZE(stac925x_pin_nids); spec->pin_nids = stac925x_pin_nids; - spec->board_config = snd_hda_check_board_config(codec, STAC_925x_MODELS, + + /* Check first for codec ID */ + spec->board_config = snd_hda_check_board_codec_sid_config(codec, + STAC_925x_MODELS, + stac925x_models, + stac925x_codec_id_cfg_tbl); + + /* Now checks for PCI ID, if codec ID is not found */ + if (spec->board_config < 0) + spec->board_config = snd_hda_check_board_config(codec, + STAC_925x_MODELS, stac925x_models, stac925x_cfg_tbl); again: if (spec->board_config < 0) { - snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC925x," + snd_printdd(KERN_INFO "hda_codec: Unknown model for STAC925x," "using BIOS defaults\n"); err = stac92xx_save_bios_config_regs(codec); } else -- cgit From 58eec4235d63e07b98544527e031e7ae807e15df Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 11 Aug 2008 10:18:39 +0200 Subject: ALSA: hda - More fixes on Gateway entries Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_sigmatel.c | 72 +++++++++++++++++++----------------------- 1 file changed, 33 insertions(+), 39 deletions(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index e70ef050530..c05d4643afd 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -55,7 +55,8 @@ enum { STAC_9200_DELL_M25, STAC_9200_DELL_M26, STAC_9200_DELL_M27, - STAC_9200_GATEWAY, + STAC_9200_M4, + STAC_9200_M4_2, STAC_9200_PANASONIC, STAC_9200_MODELS }; @@ -100,8 +101,6 @@ enum { STAC_M3, STAC_M5, STAC_M6, - STAC_MA6, - STAC_PA6, STAC_925x_MODELS }; @@ -1375,7 +1374,16 @@ static unsigned int ref9200_pin_configs[8] = { 0x02a19020, 0x01a19021, 0x90100140, 0x01813122, }; -/* +static unsigned int gateway9200_m4_pin_configs[8] = { + 0x400000fe, 0x404500f4, 0x400100f0, 0x90110010, + 0x400100f1, 0x02a1902e, 0x500000f2, 0x500000f3, +}; +static unsigned int gateway9200_m4_2_pin_configs[8] = { + 0x400000fe, 0x404500f4, 0x400100f0, 0x90110010, + 0x400100f1, 0x02a1902e, 0x500000f2, 0x500000f3, +}; + +/* STAC 9200 pin configs for 102801A8 102801DE @@ -1505,6 +1513,8 @@ static unsigned int *stac9200_brd_tbl[STAC_9200_MODELS] = { [STAC_9200_DELL_M25] = dell9200_m25_pin_configs, [STAC_9200_DELL_M26] = dell9200_m26_pin_configs, [STAC_9200_DELL_M27] = dell9200_m27_pin_configs, + [STAC_9200_M4] = gateway9200_m4_pin_configs, + [STAC_9200_M4_2] = gateway9200_m4_2_pin_configs, [STAC_9200_PANASONIC] = ref9200_pin_configs, }; @@ -1521,7 +1531,8 @@ static const char *stac9200_models[STAC_9200_MODELS] = { [STAC_9200_DELL_M25] = "dell-m25", [STAC_9200_DELL_M26] = "dell-m26", [STAC_9200_DELL_M27] = "dell-m27", - [STAC_9200_GATEWAY] = "gateway", + [STAC_9200_M4] = "gateway-m4", + [STAC_9200_M4_2] = "gateway-m4-2", [STAC_9200_PANASONIC] = "panasonic", }; @@ -1591,11 +1602,9 @@ static struct snd_pci_quirk stac9200_cfg_tbl[] = { /* Panasonic */ SND_PCI_QUIRK(0x10f7, 0x8338, "Panasonic CF-74", STAC_9200_PANASONIC), /* Gateway machines needs EAPD to be set on resume */ - SND_PCI_QUIRK(0x107b, 0x0205, "Gateway S-7110M", STAC_9200_GATEWAY), - SND_PCI_QUIRK(0x107b, 0x0317, "Gateway MT3423, MX341*", - STAC_9200_GATEWAY), - SND_PCI_QUIRK(0x107b, 0x0318, "Gateway ML3019, MT3707", - STAC_9200_GATEWAY), + SND_PCI_QUIRK(0x107b, 0x0205, "Gateway S-7110M", STAC_9200_M4), + SND_PCI_QUIRK(0x107b, 0x0317, "Gateway MT3423, MX341*", STAC_9200_M4_2), + SND_PCI_QUIRK(0x107b, 0x0318, "Gateway ML3019, MT3707", STAC_9200_M4_2), /* OQO Mobile */ SND_PCI_QUIRK(0x1106, 0x3288, "OQO Model 2", STAC_9200_OQO), {} /* terminator */ @@ -1610,46 +1619,37 @@ static unsigned int stac925xM1_pin_configs[8] = { 0x40c003f4, 0x424503f2, 0x400000f3, 0x02a19020, 0x40a000f0, 0x90100210, 0x400003f1, 0x9033032e, }; + static unsigned int stac925xM1_2_pin_configs[8] = { 0x40c003f4, 0x424503f2, 0x400000f3, 0x02a19020, 0x40a000f0, 0x90100210, 0x400003f1, 0x9033032e, }; + static unsigned int stac925xM2_pin_configs[8] = { 0x40c003f4, 0x424503f2, 0x400000f3, 0x02a19020, 0x40a000f0, 0x90100210, 0x400003f1, 0x9033032e, }; +static unsigned int stac925xM2_2_pin_configs[8] = { + 0x40c003f4, 0x424503f2, 0x400000f3, 0x02a19020, + 0x40a000f0, 0x90100210, 0x400003f1, 0x9033032e, +}; + static unsigned int stac925xM3_pin_configs[8] = { 0x40c003f4, 0x424503f2, 0x400000f3, 0x02a19020, 0x40a000f0, 0x90100210, 0x400003f1, 0x503303f3, }; + static unsigned int stac925xM5_pin_configs[8] = { 0x40c003f4, 0x424503f2, 0x400000f3, 0x02a19020, 0x40a000f0, 0x90100210, 0x400003f1, 0x9033032e, }; -/* Original M6 entry at .ini file */ static unsigned int stac925xM6_pin_configs[8] = { 0x40c003f4, 0x424503f2, 0x400000f3, 0x02a19020, 0x40a000f0, 0x90100210, 0x400003f1, 0x90330320, }; -static unsigned int stac925x_MA6_pin_configs[8] = { - 0x40c003f0, 0x424503f2, 0x01813022, 0x02a19021, - 0x90a70320, 0x90100211, 0x400003f1, 0x9033032e, -}; - -/* This entry should be M2 */ -static unsigned int stac925x_PA6_pin_configs[8] = { - 0x40c003f0, 0x424503f2, 0x01813022, 0x02a19021, - 0x50a103f0, 0x90100211, 0x400003f1, 0x9033032e, -}; - -static unsigned int stac925xM2_2_pin_configs[8] = { - 0x40c003f3, 0x424503f2, 0x04180011, 0x02a19020, - 0x50a103f0, 0x90100212, 0x400003f1, 0x9033032e, -}; - static unsigned int *stac925x_brd_tbl[STAC_925x_MODELS] = { [STAC_REF] = ref925x_pin_configs, [STAC_M1] = stac925xM1_pin_configs, @@ -1659,8 +1659,6 @@ static unsigned int *stac925x_brd_tbl[STAC_925x_MODELS] = { [STAC_M3] = stac925xM3_pin_configs, [STAC_M5] = stac925xM5_pin_configs, [STAC_M6] = stac925xM6_pin_configs, - [STAC_MA6] = stac925x_MA6_pin_configs, - [STAC_PA6] = stac925x_PA6_pin_configs, }; static const char *stac925x_models[STAC_925x_MODELS] = { @@ -1672,24 +1670,19 @@ static const char *stac925x_models[STAC_925x_MODELS] = { [STAC_M3] = "m3", [STAC_M5] = "m5", [STAC_M6] = "m6", - [STAC_MA6] = "ma6", - [STAC_PA6] = "pa6", }; static struct snd_pci_quirk stac925x_codec_id_cfg_tbl[] = { - SND_PCI_QUIRK(0x107b, 0x0316, "Gateway M255", STAC_REF), - SND_PCI_QUIRK(0x107b, 0x0366, "Gateway MP6954", STAC_REF), - - SND_PCI_QUIRK(0x107b, 0x0461, "Gateway NX560XL", STAC_MA6), - SND_PCI_QUIRK(0x107b, 0x0681, "Gateway NX860", STAC_PA6), + SND_PCI_QUIRK(0x107b, 0x0316, "Gateway M255", STAC_M2), + SND_PCI_QUIRK(0x107b, 0x0366, "Gateway MP6954", STAC_M5), + SND_PCI_QUIRK(0x107b, 0x0461, "Gateway NX560XL", STAC_M1), + SND_PCI_QUIRK(0x107b, 0x0681, "Gateway NX860", STAC_M2), SND_PCI_QUIRK(0x107b, 0x0367, "Gateway MX6453", STAC_M1_2), - /* Not sure about the brand name for those */ SND_PCI_QUIRK(0x107b, 0x0281, "Gateway mobile", STAC_M1), SND_PCI_QUIRK(0x107b, 0x0507, "Gateway mobile", STAC_M3), SND_PCI_QUIRK(0x107b, 0x0281, "Gateway mobile", STAC_M6), SND_PCI_QUIRK(0x107b, 0x0685, "Gateway mobile", STAC_M2_2), - {} /* terminator */ }; @@ -4399,7 +4392,8 @@ static int patch_stac9200(struct hda_codec *codec) spec->num_adcs = 1; spec->num_pwrs = 0; - if (spec->board_config == STAC_9200_GATEWAY || + if (spec->board_config == STAC_9200_M4 || + spec->board_config == STAC_9200_M4_2 || spec->board_config == STAC_9200_OQO) spec->init = stac9200_eapd_init; else -- cgit From 1433fa7d8da608f59df1f6f381da00958bf5a0b7 Mon Sep 17 00:00:00 2001 From: Jason Jin Date: Thu, 4 Dec 2008 19:14:28 +0800 Subject: powerpc: Fix the ide suspend function in uli1575 The general pci resume code can only restore part of the configuration registers. We need to reconfigure those registers in the FIXUP_RESUME. Signed-off-by: Jason Jin Signed-off-by: Kumar Gala --- arch/powerpc/platforms/fsl_uli1575.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/fsl_uli1575.c b/arch/powerpc/platforms/fsl_uli1575.c index 8c619963bec..1db6b9e037f 100644 --- a/arch/powerpc/platforms/fsl_uli1575.c +++ b/arch/powerpc/platforms/fsl_uli1575.c @@ -249,6 +249,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5288, quirk_uli5288); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x5249, quirk_final_uli5249); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AL, 0x1575, quirk_final_uli1575); +DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_AL, 0x5229, quirk_uli5229); static void __devinit hpcd_quirk_uli1575(struct pci_dev *dev) { -- cgit From be122d6d8b836fec329d740e75bfbdffa96ff6ff Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Tue, 6 Jan 2009 10:23:37 -0600 Subject: powerpc/85xx: Fix PCIe error interrupts The PCIe interrupts for 8544ds and 8572ds were incorrect. The 8572 case was found by Liu Yu. Signed-off-by: Kumar Gala --- arch/powerpc/boot/dts/mpc8544ds.dts | 4 ++-- arch/powerpc/boot/dts/mpc8572ds.dts | 4 ++-- arch/powerpc/boot/dts/mpc8572ds_camp_core0.dts | 2 +- arch/powerpc/boot/dts/mpc8572ds_camp_core1.dts | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/boot/dts/mpc8544ds.dts b/arch/powerpc/boot/dts/mpc8544ds.dts index b9da4210506..0668d104877 100644 --- a/arch/powerpc/boot/dts/mpc8544ds.dts +++ b/arch/powerpc/boot/dts/mpc8544ds.dts @@ -313,7 +313,7 @@ 0x1000000 0x0 0x0 0xe1010000 0x0 0x10000>; clock-frequency = <33333333>; interrupt-parent = <&mpic>; - interrupts = <26 2>; + interrupts = <25 2>; interrupt-map-mask = <0xf800 0x0 0x0 0x7>; interrupt-map = < /* IDSEL 0x0 */ @@ -350,7 +350,7 @@ 0x1000000 0x0 0x0 0xe1020000 0x0 0x10000>; clock-frequency = <33333333>; interrupt-parent = <&mpic>; - interrupts = <25 2>; + interrupts = <26 2>; interrupt-map-mask = <0xf800 0x0 0x0 0x7>; interrupt-map = < /* IDSEL 0x0 */ diff --git a/arch/powerpc/boot/dts/mpc8572ds.dts b/arch/powerpc/boot/dts/mpc8572ds.dts index 21459e161d0..3dcc001b8ed 100644 --- a/arch/powerpc/boot/dts/mpc8572ds.dts +++ b/arch/powerpc/boot/dts/mpc8572ds.dts @@ -724,7 +724,7 @@ 0x1000000 0x0 0x00000000 0 0xffc10000 0x0 0x00010000>; clock-frequency = <33333333>; interrupt-parent = <&mpic>; - interrupts = <26 2>; + interrupts = <25 2>; interrupt-map-mask = <0xf800 0x0 0x0 0x7>; interrupt-map = < /* IDSEL 0x0 */ @@ -761,7 +761,7 @@ 0x1000000 0x0 0x00000000 0 0xffc20000 0x0 0x00010000>; clock-frequency = <33333333>; interrupt-parent = <&mpic>; - interrupts = <27 2>; + interrupts = <26 2>; interrupt-map-mask = <0xf800 0x0 0x0 0x7>; interrupt-map = < /* IDSEL 0x0 */ diff --git a/arch/powerpc/boot/dts/mpc8572ds_camp_core0.dts b/arch/powerpc/boot/dts/mpc8572ds_camp_core0.dts index c114c4ee993..fd462efa9e6 100644 --- a/arch/powerpc/boot/dts/mpc8572ds_camp_core0.dts +++ b/arch/powerpc/boot/dts/mpc8572ds_camp_core0.dts @@ -457,7 +457,7 @@ 0x1000000 0x0 0x0 0xffc10000 0x0 0x10000>; clock-frequency = <33333333>; interrupt-parent = <&mpic>; - interrupts = <26 2>; + interrupts = <25 2>; interrupt-map-mask = <0xf800 0x0 0x0 0x7>; interrupt-map = < /* IDSEL 0x0 */ diff --git a/arch/powerpc/boot/dts/mpc8572ds_camp_core1.dts b/arch/powerpc/boot/dts/mpc8572ds_camp_core1.dts index 04ecda18d20..e35230f2ac9 100644 --- a/arch/powerpc/boot/dts/mpc8572ds_camp_core1.dts +++ b/arch/powerpc/boot/dts/mpc8572ds_camp_core1.dts @@ -208,7 +208,7 @@ 0x1000000 0x0 0x0 0xffc20000 0x0 0x10000>; clock-frequency = <33333333>; interrupt-parent = <&mpic>; - interrupts = <27 2>; + interrupts = <26 2>; interrupt-map-mask = <0xf800 0x0 0x0 0x7>; interrupt-map = < /* IDSEL 0x0 */ -- cgit From fdd4e8152f5f0d6d500b35515265e425acdfd203 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 6 Jan 2009 17:12:23 -0600 Subject: powerpc/qe: add Ethernet UPSMR definitions to QE library Add the UCC_GETH_UPSMR_xxx definitions to qe.h. The ucc_geth driver will eventually use these instead of the UPSMR_ macros it currently defines. Signed-off-by: Timur Tabi Signed-off-by: Kumar Gala --- arch/powerpc/include/asm/qe.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h index a0a15311d0d..2701753d993 100644 --- a/arch/powerpc/include/asm/qe.h +++ b/arch/powerpc/include/asm/qe.h @@ -624,7 +624,7 @@ struct ucc_slow_pram { #define UCC_GETH_UCCE_RXF1 0x00000002 #define UCC_GETH_UCCE_RXF0 0x00000001 -/* UPSMR, when used as a UART */ +/* UCC Protocol Specific Mode Register (UPSMR), when used for UART */ #define UCC_UART_UPSMR_FLC 0x8000 #define UCC_UART_UPSMR_SL 0x4000 #define UCC_UART_UPSMR_CL_MASK 0x3000 @@ -652,6 +652,23 @@ struct ucc_slow_pram { #define UCC_UART_UPSMR_TPM_EVEN 0x0002 #define UCC_UART_UPSMR_TPM_HIGH 0x0003 +/* UCC Protocol Specific Mode Register (UPSMR), when used for Ethernet */ +#define UCC_GETH_UPSMR_FTFE 0x80000000 +#define UCC_GETH_UPSMR_PTPE 0x40000000 +#define UCC_GETH_UPSMR_ECM 0x04000000 +#define UCC_GETH_UPSMR_HSE 0x02000000 +#define UCC_GETH_UPSMR_PRO 0x00400000 +#define UCC_GETH_UPSMR_CAP 0x00200000 +#define UCC_GETH_UPSMR_RSH 0x00100000 +#define UCC_GETH_UPSMR_RPM 0x00080000 +#define UCC_GETH_UPSMR_R10M 0x00040000 +#define UCC_GETH_UPSMR_RLPB 0x00020000 +#define UCC_GETH_UPSMR_TBIM 0x00010000 +#define UCC_GETH_UPSMR_RES1 0x00002000 +#define UCC_GETH_UPSMR_RMM 0x00001000 +#define UCC_GETH_UPSMR_CAM 0x00000400 +#define UCC_GETH_UPSMR_BRO 0x00000200 + /* UCC Transmit On Demand Register (UTODR) */ #define UCC_SLOW_TOD 0x8000 #define UCC_FAST_TOD 0x8000 -- cgit From a097a78c1e6e4030fcef3bcab6351b6001662335 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Tue, 6 Jan 2009 22:37:53 -0600 Subject: powerpc/fsl-pci: Better ATMU setup for 85xx/86xx The code that sets up the outbound ATMU windows, which is used to map CPU physical addresses into PCI bus addresses where BARs will be mapped, didn't work so well. For one, it leaked the ioremap() of the ATMU registers. Another small bug was the high 20 bits of the PCI bus address were left as zero. It's legal for prefetchable memory regions to be above 32 bits, so the high 20 bits might not be zero. Mainly, it couldn't handle ranges that were not a power of two in size or were not naturally aligned. The ATMU windows have these requirements (size & alignment), but the code didn't bother to check if the ranges it was programming met them. If they didn't, the windows would silently be programmed incorrectly. This new code can handle ranges which are not power of two sized nor naturally aligned. It simply splits the ranges into multiple valid ATMU windows. As there are only four windows, pooly aligned or sized ranges (which didn't even work before) may run out of windows. In this case an error is printed and an effort is made to disable the unmapped resources. An improvement that could be made would be to make use of the default outbound window. Iff hose->pci_mem_offset is zero, then it's possible that some or all of the ranges might not need an outbound window and could just use the default window. The default ATMU window can support a pci_mem_offset less than zero too, but pci_mem_offset is unsigned. One could say the abilities allowed a powerpc pci_controller is neither subset nor a superset of the abilities of a Freescale PCIe controller. Thankfully, the most useful bits are in the intersection of the two abilities. Signed-off-by: Trent Piepho Signed-off-by: Kumar Gala --- arch/powerpc/sysdev/fsl_pci.c | 104 ++++++++++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index f611d0369cc..44bc903ead7 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -28,63 +28,101 @@ #include #if defined(CONFIG_PPC_85xx) || defined(CONFIG_PPC_86xx) +static int __init setup_one_atmu(struct ccsr_pci __iomem *pci, + unsigned int index, const struct resource *res, + resource_size_t offset) +{ + resource_size_t pci_addr = res->start - offset; + resource_size_t phys_addr = res->start; + resource_size_t size = res->end - res->start + 1; + u32 flags = 0x80044000; /* enable & mem R/W */ + unsigned int i; + + pr_debug("PCI MEM resource start 0x%016llx, size 0x%016llx.\n", + (u64)res->start, (u64)size); + + for (i = 0; size > 0; i++) { + unsigned int bits = min(__ilog2(size), + __ffs(pci_addr | phys_addr)); + + if (index + i >= 5) + return -1; + + out_be32(&pci->pow[index + i].potar, pci_addr >> 12); + out_be32(&pci->pow[index + i].potear, (u64)pci_addr >> 44); + out_be32(&pci->pow[index + i].powbar, phys_addr >> 12); + out_be32(&pci->pow[index + i].powar, flags | (bits - 1)); + + pci_addr += (resource_size_t)1U << bits; + phys_addr += (resource_size_t)1U << bits; + size -= (resource_size_t)1U << bits; + } + + return i; +} + /* atmu setup for fsl pci/pcie controller */ static void __init setup_pci_atmu(struct pci_controller *hose, struct resource *rsrc) { struct ccsr_pci __iomem *pci; - int i; + int i, j, n; pr_debug("PCI memory map start 0x%016llx, size 0x%016llx\n", (u64)rsrc->start, (u64)rsrc->end - (u64)rsrc->start + 1); pci = ioremap(rsrc->start, rsrc->end - rsrc->start + 1); + if (!pci) { + dev_err(hose->parent, "Unable to map ATMU registers\n"); + return; + } - /* Disable all windows (except powar0 since its ignored) */ + /* Disable all windows (except powar0 since it's ignored) */ for(i = 1; i < 5; i++) out_be32(&pci->pow[i].powar, 0); for(i = 0; i < 3; i++) out_be32(&pci->piw[i].piwar, 0); /* Setup outbound MEM window */ - for(i = 0; i < 3; i++) - if (hose->mem_resources[i].flags & IORESOURCE_MEM){ - resource_size_t pci_addr_start = - hose->mem_resources[i].start - - hose->pci_mem_offset; - pr_debug("PCI MEM resource start 0x%016llx, size 0x%016llx.\n", - (u64)hose->mem_resources[i].start, - (u64)hose->mem_resources[i].end - - (u64)hose->mem_resources[i].start + 1); - out_be32(&pci->pow[i+1].potar, (pci_addr_start >> 12)); - out_be32(&pci->pow[i+1].potear, 0); - out_be32(&pci->pow[i+1].powbar, - (hose->mem_resources[i].start >> 12)); - /* Enable, Mem R/W */ - out_be32(&pci->pow[i+1].powar, 0x80044000 - | (__ilog2(hose->mem_resources[i].end - - hose->mem_resources[i].start + 1) - 1)); - } + for(i = 0, j = 1; i < 3; i++) { + if (!(hose->mem_resources[i].flags & IORESOURCE_MEM)) + continue; + + n = setup_one_atmu(pci, j, &hose->mem_resources[i], + hose->pci_mem_offset); + + if (n < 0 || j >= 5) { + pr_err("Ran out of outbound PCI ATMUs for resource %d!\n", i); + hose->mem_resources[i].flags |= IORESOURCE_DISABLED; + } else + j += n; + } /* Setup outbound IO window */ - if (hose->io_resource.flags & IORESOURCE_IO){ - pr_debug("PCI IO resource start 0x%016llx, size 0x%016llx, " - "phy base 0x%016llx.\n", - (u64)hose->io_resource.start, - (u64)hose->io_resource.end - (u64)hose->io_resource.start + 1, - (u64)hose->io_base_phys); - out_be32(&pci->pow[i+1].potar, (hose->io_resource.start >> 12)); - out_be32(&pci->pow[i+1].potear, 0); - out_be32(&pci->pow[i+1].powbar, (hose->io_base_phys >> 12)); - /* Enable, IO R/W */ - out_be32(&pci->pow[i+1].powar, 0x80088000 - | (__ilog2(hose->io_resource.end - - hose->io_resource.start + 1) - 1)); + if (hose->io_resource.flags & IORESOURCE_IO) { + if (j >= 5) { + pr_err("Ran out of outbound PCI ATMUs for IO resource\n"); + } else { + pr_debug("PCI IO resource start 0x%016llx, size 0x%016llx, " + "phy base 0x%016llx.\n", + (u64)hose->io_resource.start, + (u64)hose->io_resource.end - (u64)hose->io_resource.start + 1, + (u64)hose->io_base_phys); + out_be32(&pci->pow[j].potar, (hose->io_resource.start >> 12)); + out_be32(&pci->pow[j].potear, 0); + out_be32(&pci->pow[j].powbar, (hose->io_base_phys >> 12)); + /* Enable, IO R/W */ + out_be32(&pci->pow[j].powar, 0x80088000 + | (__ilog2(hose->io_resource.end + - hose->io_resource.start + 1) - 1)); + } } /* Setup 2G inbound Memory Window @ 1 */ out_be32(&pci->piw[2].pitar, 0x00000000); out_be32(&pci->piw[2].piwbar,0x00000000); out_be32(&pci->piw[2].piwar, PIWAR_2G); + + iounmap(pci); } static void __init setup_pci_cmd(struct pci_controller *hose) -- cgit From 565f37642c78754a85efe6c20a4a15e18ed21f07 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Wed, 17 Dec 2008 11:43:26 -0800 Subject: powerpc/fsl-pci: Set relaxed ordering on prefetchable ranges Provides a small speedup when accessing pefetchable ranges. To indicate that a memory range is prefetchable, mark it in the dts file with 42000000 instead of 02000000. A powepc pci_controller is allowed three memory ranges, any of which may be prefetchable. However, the PCI-PCI bridge configuration space only has one field for "non-prefetchable memory behind bridge", which has a 32 bit address, and one field for "prefetchable memory behind bridge", which may have a 64 bit address. These are PCI bus addresses, not CPU physical addresses. So really you're only allowed one memory range of each type. And if you want the range at a PCI address above 32 bits you must make it prefetchable. Signed-off-by: Trent Piepho Signed-off-by: Kumar Gala --- arch/powerpc/sysdev/fsl_pci.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 44bc903ead7..9817f63723d 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -41,6 +41,9 @@ static int __init setup_one_atmu(struct ccsr_pci __iomem *pci, pr_debug("PCI MEM resource start 0x%016llx, size 0x%016llx.\n", (u64)res->start, (u64)size); + if (res->flags & IORESOURCE_PREFETCH) + flags |= 0x10000000; /* enable relaxed ordering */ + for (i = 0; size > 0; i++) { unsigned int bits = min(__ilog2(size), __ffs(pci_addr | phys_addr)); -- cgit From 19f5465e823858a2f0b0e9a92e52816ba3ee70bb Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Mon, 8 Dec 2008 19:34:55 -0800 Subject: powerpc/fsl-booke: Don't hard-code size of struct tlbcam Some assembly code in head_fsl_booke.S hard-coded the size of struct tlbcam to 20 when it indexed the TLBCAM table. Anyone changing the size of struct tlbcam would not know to expect that. The kernel already has a system to get the size of C structures into assembly language files, asm-offsets, so let's use it. The definition of the struct gets moved to a header, so that asm-offsets.c can include it. Signed-off-by: Trent Piepho Signed-off-by: Kumar Gala --- arch/powerpc/kernel/asm-offsets.c | 7 +++++++ arch/powerpc/kernel/head_fsl_booke.S | 2 +- arch/powerpc/mm/fsl_booke_mmu.c | 8 +------- arch/powerpc/mm/mmu_decl.h | 9 +++++++++ 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 661d07d2146..06958da94f1 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -56,6 +56,10 @@ #include "head_booke.h" #endif +#if defined(CONFIG_FSL_BOOKE) +#include "../mm/mmu_decl.h" +#endif + int main(void) { DEFINE(THREAD, offsetof(struct task_struct, thread)); @@ -384,6 +388,9 @@ int main(void) DEFINE(PGD_T_LOG2, PGD_T_LOG2); DEFINE(PTE_T_LOG2, PTE_T_LOG2); #endif +#ifdef CONFIG_FSL_BOOKE + DEFINE(TLBCAM_SIZE, sizeof(struct tlbcam)); +#endif return 0; } diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 11b549acc03..805f28d30e4 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -909,7 +909,7 @@ KernelSPE: _GLOBAL(loadcam_entry) lis r4,TLBCAM@ha addi r4,r4,TLBCAM@l - mulli r5,r3,20 + mulli r5,r3,TLBCAM_SIZE add r3,r5,r4 lwz r4,0(r3) mtspr SPRN_MAS0,r4 diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 23cee39534f..c9ee59af100 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -61,13 +61,7 @@ static unsigned long __cam0, __cam1, __cam2; #define NUM_TLBCAMS (16) -struct tlbcam { - u32 MAS0; - u32 MAS1; - u32 MAS2; - u32 MAS3; - u32 MAS7; -} TLBCAM[NUM_TLBCAMS]; +struct tlbcam TLBCAM[NUM_TLBCAMS]; struct tlbcamrange { unsigned long start; diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 4314b39b6fa..6f6ee62c2a0 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -75,6 +75,15 @@ extern void _tlbia(void); #endif /* CONFIG_PPC_MMU_NOHASH */ #ifdef CONFIG_PPC32 + +struct tlbcam { + u32 MAS0; + u32 MAS1; + u32 MAS2; + u32 MAS3; + u32 MAS7; +}; + extern void mapin_ram(void); extern int map_page(unsigned long va, phys_addr_t pa, int flags); extern void setbat(int index, unsigned long virt, phys_addr_t phys, -- cgit From 6fd8be4bf72879b3039654388e985cabf8449af5 Mon Sep 17 00:00:00 2001 From: Trent Piepho Date: Mon, 8 Dec 2008 19:34:56 -0800 Subject: powerpc/fsl-booke: Remove num_tlbcam_entries This is a global variable defined in fsl_booke_mmu.c with a value that gets initialized in assembly code in head_fsl_booke.S. It's never used. If some code ever does want to know the number of entries in TLB1, then "numcams = mfspr(SPRN_TLB1CFG) & 0xfff", is a whole lot simpler than a global initialized during kernel boot from assembly. Signed-off-by: Trent Piepho Signed-off-by: Kumar Gala --- arch/powerpc/kernel/head_fsl_booke.S | 4 ---- arch/powerpc/mm/fsl_booke_mmu.c | 1 - arch/powerpc/mm/mmu_decl.h | 2 -- 3 files changed, 7 deletions(-) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 805f28d30e4..2f32720a44a 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -389,10 +389,6 @@ skpinv: addi r6,r6,1 /* Increment */ #endif #endif - mfspr r3,SPRN_TLB1CFG - andi. r3,r3,0xfff - lis r4,num_tlbcam_entries@ha - stw r3,num_tlbcam_entries@l(r4) /* * Decide what sort of machine this is and initialize the MMU. */ diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index c9ee59af100..1971e4ee3d6 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -56,7 +56,6 @@ extern void loadcam_entry(unsigned int index); unsigned int tlbcam_index; -unsigned int num_tlbcam_entries; static unsigned long __cam0, __cam1, __cam2; #define NUM_TLBCAMS (16) diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 6f6ee62c2a0..d0bb69dc627 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -99,8 +99,6 @@ extern unsigned int rtas_data, rtas_size; struct hash_pte; extern struct hash_pte *Hash, *Hash_end; extern unsigned long Hash_size, Hash_mask; - -extern unsigned int num_tlbcam_entries; #endif extern unsigned long ioremap_bot; -- cgit From c9a98553d513dfc82cdce869970d5662c1f22c68 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 1 Jan 2009 14:21:16 -0500 Subject: [XFS] pass XFS_IGET_BULKSTAT to xfs_iget for handle operations NFS clients or users of the handle ioctls can pass us arbitrary inode numbers through the exportfs interface. Make sure we use the XFS_IGET_BULKSTAT so that these don't cause shutdowns due to the corruption checks. Also translate the EINVAL we get back for invalid inode clusters into an ESTALE which is more appropinquate, and remove the useless check for a NULL inode on a successfull xfs_iget return. I have a testcase to reproduce this using the handle interface which I will submit to xfsqa. Reported-by: Mario Becroft Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_export.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 595751f7835..87b8cbd23d4 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -126,11 +126,26 @@ xfs_nfs_get_inode( if (ino == 0) return ERR_PTR(-ESTALE); - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); - if (error) + /* + * The XFS_IGET_BULKSTAT means that an invalid inode number is just + * fine and not an indication of a corrupted filesystem. Because + * clients can send any kind of invalid file handle, e.g. after + * a restore on the server we have to deal with this case gracefully. + */ + error = xfs_iget(mp, NULL, ino, XFS_IGET_BULKSTAT, + XFS_ILOCK_SHARED, &ip, 0); + if (error) { + /* + * EINVAL means the inode cluster doesn't exist anymore. + * This implies the filehandle is stale, so we should + * translate it here. + * We don't use ESTALE directly down the chain to not + * confuse applications using bulkstat that expect EINVAL. + */ + if (error == EINVAL) + error = ESTALE; return ERR_PTR(-error); - if (!ip) - return ERR_PTR(-EIO); + } if (ip->i_d.di_gen != generation) { xfs_iput_new(ip, XFS_ILOCK_SHARED); -- cgit From 9800b550355e99c9bcaba7ec6540751dce0823d7 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 1 Jan 2009 16:40:10 -0600 Subject: [XFS] Remove several unused typedefs. Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_aops.h | 2 -- fs/xfs/xfs_acl.h | 1 - fs/xfs/xfs_types.h | 2 -- 3 files changed, 5 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 7b26f5ff969..1dd52884975 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -21,8 +21,6 @@ extern struct workqueue_struct *xfsdatad_workqueue; extern mempool_t *xfs_ioend_pool; -typedef void (*xfs_ioend_func_t)(void *); - /* * xfs_ioend struct manages large extent writes for XFS. * It can manage several multi-page bio's at once. diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index a4e293b93ef..642f1db4def 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -22,7 +22,6 @@ * Access Control Lists */ typedef __uint16_t xfs_acl_perm_t; -typedef __int32_t xfs_acl_type_t; typedef __int32_t xfs_acl_tag_t; typedef __int32_t xfs_acl_id_t; diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 0f5191644ab..baedbd14dc2 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -111,8 +111,6 @@ typedef __uint64_t xfs_fileoff_t; /* block number in a file */ typedef __int64_t xfs_sfiloff_t; /* signed block number in a file */ typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */ -typedef __uint8_t xfs_arch_t; /* architecture of an xfs fs */ - /* * Null values for the types. */ -- cgit From c9fb86a917640d66ba2e0613a12f3a76eda8a30f Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Thu, 1 Jan 2009 16:40:11 -0600 Subject: [XFS] Remove macro-to-function indirections in attr code Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_attr_leaf.c | 72 +++++++++++++++++++++++++------------------------- fs/xfs/xfs_attr_leaf.h | 12 --------- 2 files changed, 36 insertions(+), 48 deletions(-) diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 79da6b2ea99..6c323f8a4cd 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -736,7 +736,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp) continue; /* don't copy partial entries */ if (!(entry->flags & XFS_ATTR_LOCAL)) return(0); - name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, i); + name_loc = xfs_attr_leaf_name_local(leaf, i); if (name_loc->namelen >= XFS_ATTR_SF_ENTSIZE_MAX) return(0); if (be16_to_cpu(name_loc->valuelen) >= XFS_ATTR_SF_ENTSIZE_MAX) @@ -823,7 +823,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) if (!entry->nameidx) continue; ASSERT(entry->flags & XFS_ATTR_LOCAL); - name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, i); + name_loc = xfs_attr_leaf_name_local(leaf, i); nargs.name = (char *)name_loc->nameval; nargs.namelen = name_loc->namelen; nargs.value = (char *)&name_loc->nameval[nargs.namelen]; @@ -1141,14 +1141,14 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) * as part of this transaction (a split operation for example). */ if (entry->flags & XFS_ATTR_LOCAL) { - name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, args->index); + name_loc = xfs_attr_leaf_name_local(leaf, args->index); name_loc->namelen = args->namelen; name_loc->valuelen = cpu_to_be16(args->valuelen); memcpy((char *)name_loc->nameval, args->name, args->namelen); memcpy((char *)&name_loc->nameval[args->namelen], args->value, be16_to_cpu(name_loc->valuelen)); } else { - name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index); + name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); name_rmt->namelen = args->namelen; memcpy((char *)name_rmt->name, args->name, args->namelen); entry->flags |= XFS_ATTR_INCOMPLETE; @@ -1159,7 +1159,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen); } xfs_da_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, XFS_ATTR_LEAF_NAME(leaf, args->index), + XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index), xfs_attr_leaf_entsize(leaf, args->index))); /* @@ -1749,10 +1749,10 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args) /* * Compress the remaining entries and zero out the removed stuff. */ - memset(XFS_ATTR_LEAF_NAME(leaf, args->index), 0, entsize); + memset(xfs_attr_leaf_name(leaf, args->index), 0, entsize); be16_add_cpu(&hdr->usedbytes, -entsize); xfs_da_log_buf(args->trans, bp, - XFS_DA_LOGRANGE(leaf, XFS_ATTR_LEAF_NAME(leaf, args->index), + XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index), entsize)); tmp = (be16_to_cpu(hdr->count) - args->index) @@ -1985,7 +1985,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) continue; } if (entry->flags & XFS_ATTR_LOCAL) { - name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, probe); + name_loc = xfs_attr_leaf_name_local(leaf, probe); if (name_loc->namelen != args->namelen) continue; if (memcmp(args->name, (char *)name_loc->nameval, args->namelen) != 0) @@ -1995,7 +1995,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args) args->index = probe; return(XFS_ERROR(EEXIST)); } else { - name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, probe); + name_rmt = xfs_attr_leaf_name_remote(leaf, probe); if (name_rmt->namelen != args->namelen) continue; if (memcmp(args->name, (char *)name_rmt->name, @@ -2035,7 +2035,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args) entry = &leaf->entries[args->index]; if (entry->flags & XFS_ATTR_LOCAL) { - name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, args->index); + name_loc = xfs_attr_leaf_name_local(leaf, args->index); ASSERT(name_loc->namelen == args->namelen); ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0); valuelen = be16_to_cpu(name_loc->valuelen); @@ -2050,7 +2050,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args) args->valuelen = valuelen; memcpy(args->value, &name_loc->nameval[args->namelen], valuelen); } else { - name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index); + name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); ASSERT(name_rmt->namelen == args->namelen); ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); valuelen = be32_to_cpu(name_rmt->valuelen); @@ -2143,7 +2143,7 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, * off for 6.2, should be revisited later. */ if (entry_s->flags & XFS_ATTR_INCOMPLETE) { /* skip partials? */ - memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp); + memset(xfs_attr_leaf_name(leaf_s, start_s + i), 0, tmp); be16_add_cpu(&hdr_s->usedbytes, -tmp); be16_add_cpu(&hdr_s->count, -1); entry_d--; /* to compensate for ++ in loop hdr */ @@ -2160,11 +2160,11 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s, entry_d->flags = entry_s->flags; ASSERT(be16_to_cpu(entry_d->nameidx) + tmp <= XFS_LBSIZE(mp)); - memmove(XFS_ATTR_LEAF_NAME(leaf_d, desti), - XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), tmp); + memmove(xfs_attr_leaf_name(leaf_d, desti), + xfs_attr_leaf_name(leaf_s, start_s + i), tmp); ASSERT(be16_to_cpu(entry_s->nameidx) + tmp <= XFS_LBSIZE(mp)); - memset(XFS_ATTR_LEAF_NAME(leaf_s, start_s + i), 0, tmp); + memset(xfs_attr_leaf_name(leaf_s, start_s + i), 0, tmp); be16_add_cpu(&hdr_s->usedbytes, -tmp); be16_add_cpu(&hdr_d->usedbytes, tmp); be16_add_cpu(&hdr_s->count, -1); @@ -2276,12 +2276,12 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index) ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); if (leaf->entries[index].flags & XFS_ATTR_LOCAL) { - name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, index); - size = XFS_ATTR_LEAF_ENTSIZE_LOCAL(name_loc->namelen, + name_loc = xfs_attr_leaf_name_local(leaf, index); + size = xfs_attr_leaf_entsize_local(name_loc->namelen, be16_to_cpu(name_loc->valuelen)); } else { - name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, index); - size = XFS_ATTR_LEAF_ENTSIZE_REMOTE(name_rmt->namelen); + name_rmt = xfs_attr_leaf_name_remote(leaf, index); + size = xfs_attr_leaf_entsize_remote(name_rmt->namelen); } return(size); } @@ -2297,13 +2297,13 @@ xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local) { int size; - size = XFS_ATTR_LEAF_ENTSIZE_LOCAL(namelen, valuelen); - if (size < XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(blocksize)) { + size = xfs_attr_leaf_entsize_local(namelen, valuelen); + if (size < xfs_attr_leaf_entsize_local_max(blocksize)) { if (local) { *local = 1; } } else { - size = XFS_ATTR_LEAF_ENTSIZE_REMOTE(namelen); + size = xfs_attr_leaf_entsize_remote(namelen); if (local) { *local = 0; } @@ -2372,7 +2372,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) if (entry->flags & XFS_ATTR_LOCAL) { xfs_attr_leaf_name_local_t *name_loc = - XFS_ATTR_LEAF_NAME_LOCAL(leaf, i); + xfs_attr_leaf_name_local(leaf, i); retval = context->put_listent(context, entry->flags, @@ -2384,7 +2384,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) return retval; } else { xfs_attr_leaf_name_remote_t *name_rmt = - XFS_ATTR_LEAF_NAME_REMOTE(leaf, i); + xfs_attr_leaf_name_remote(leaf, i); int valuelen = be32_to_cpu(name_rmt->valuelen); @@ -2468,11 +2468,11 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) #ifdef DEBUG if (entry->flags & XFS_ATTR_LOCAL) { - name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, args->index); + name_loc = xfs_attr_leaf_name_local(leaf, args->index); namelen = name_loc->namelen; name = (char *)name_loc->nameval; } else { - name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index); + name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); namelen = name_rmt->namelen; name = (char *)name_rmt->name; } @@ -2487,7 +2487,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) if (args->rmtblkno) { ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0); - name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index); + name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); name_rmt->valueblk = cpu_to_be32(args->rmtblkno); name_rmt->valuelen = cpu_to_be32(args->valuelen); xfs_da_log_buf(args->trans, bp, @@ -2534,7 +2534,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args) xfs_da_log_buf(args->trans, bp, XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry))); if ((entry->flags & XFS_ATTR_LOCAL) == 0) { - name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, args->index); + name_rmt = xfs_attr_leaf_name_remote(leaf, args->index); name_rmt->valueblk = 0; name_rmt->valuelen = 0; xfs_da_log_buf(args->trans, bp, @@ -2607,20 +2607,20 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) #ifdef DEBUG if (entry1->flags & XFS_ATTR_LOCAL) { - name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf1, args->index); + name_loc = xfs_attr_leaf_name_local(leaf1, args->index); namelen1 = name_loc->namelen; name1 = (char *)name_loc->nameval; } else { - name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf1, args->index); + name_rmt = xfs_attr_leaf_name_remote(leaf1, args->index); namelen1 = name_rmt->namelen; name1 = (char *)name_rmt->name; } if (entry2->flags & XFS_ATTR_LOCAL) { - name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf2, args->index2); + name_loc = xfs_attr_leaf_name_local(leaf2, args->index2); namelen2 = name_loc->namelen; name2 = (char *)name_loc->nameval; } else { - name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf2, args->index2); + name_rmt = xfs_attr_leaf_name_remote(leaf2, args->index2); namelen2 = name_rmt->namelen; name2 = (char *)name_rmt->name; } @@ -2637,7 +2637,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1))); if (args->rmtblkno) { ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0); - name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf1, args->index); + name_rmt = xfs_attr_leaf_name_remote(leaf1, args->index); name_rmt->valueblk = cpu_to_be32(args->rmtblkno); name_rmt->valuelen = cpu_to_be32(args->valuelen); xfs_da_log_buf(args->trans, bp1, @@ -2648,7 +2648,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) xfs_da_log_buf(args->trans, bp2, XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2))); if ((entry2->flags & XFS_ATTR_LOCAL) == 0) { - name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf2, args->index2); + name_rmt = xfs_attr_leaf_name_remote(leaf2, args->index2); name_rmt->valueblk = 0; name_rmt->valuelen = 0; xfs_da_log_buf(args->trans, bp2, @@ -2855,7 +2855,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp) for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { if (be16_to_cpu(entry->nameidx) && ((entry->flags & XFS_ATTR_LOCAL) == 0)) { - name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i); + name_rmt = xfs_attr_leaf_name_remote(leaf, i); if (name_rmt->valueblk) count++; } @@ -2883,7 +2883,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp) for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { if (be16_to_cpu(entry->nameidx) && ((entry->flags & XFS_ATTR_LOCAL) == 0)) { - name_rmt = XFS_ATTR_LEAF_NAME_REMOTE(leaf, i); + name_rmt = xfs_attr_leaf_name_remote(leaf, i); if (name_rmt->valueblk) { lp->valueblk = be32_to_cpu(name_rmt->valueblk); lp->valuelen = XFS_B_TO_FSB(dp->i_mount, diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index 83e9af417ca..9c7d22fdcf4 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h @@ -151,8 +151,6 @@ typedef struct xfs_attr_leafblock { /* * Cast typed pointers for "local" and "remote" name/value structs. */ -#define XFS_ATTR_LEAF_NAME_REMOTE(leafp,idx) \ - xfs_attr_leaf_name_remote(leafp,idx) static inline xfs_attr_leaf_name_remote_t * xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx) { @@ -160,8 +158,6 @@ xfs_attr_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx) &((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)]; } -#define XFS_ATTR_LEAF_NAME_LOCAL(leafp,idx) \ - xfs_attr_leaf_name_local(leafp,idx) static inline xfs_attr_leaf_name_local_t * xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx) { @@ -169,8 +165,6 @@ xfs_attr_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx) &((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)]; } -#define XFS_ATTR_LEAF_NAME(leafp,idx) \ - xfs_attr_leaf_name(leafp,idx) static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx) { return &((char *)leafp)[be16_to_cpu(leafp->entries[idx].nameidx)]; @@ -181,24 +175,18 @@ static inline char *xfs_attr_leaf_name(xfs_attr_leafblock_t *leafp, int idx) * a "local" name/value structure, a "remote" name/value structure, and * a pointer which might be either. */ -#define XFS_ATTR_LEAF_ENTSIZE_REMOTE(nlen) \ - xfs_attr_leaf_entsize_remote(nlen) static inline int xfs_attr_leaf_entsize_remote(int nlen) { return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \ XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); } -#define XFS_ATTR_LEAF_ENTSIZE_LOCAL(nlen,vlen) \ - xfs_attr_leaf_entsize_local(nlen,vlen) static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen) { return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) + XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); } -#define XFS_ATTR_LEAF_ENTSIZE_LOCAL_MAX(bsize) \ - xfs_attr_leaf_entsize_local_max(bsize) static inline int xfs_attr_leaf_entsize_local_max(int bsize) { return (((bsize) >> 1) + ((bsize) >> 2)); -- cgit From fb82557f16f3700ae4961a4ce599bdaff6a10b1c Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Fri, 9 Jan 2009 15:53:54 +1100 Subject: [XFS] Remove macro-to-function indirections in the mask code Signed-off-by: Eric Sandeen Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_ag.h | 2 +- fs/xfs/xfs_bit.h | 10 +----- fs/xfs/xfs_bmap_btree.c | 84 ++++++++++++++++++++++++------------------------- 3 files changed, 44 insertions(+), 52 deletions(-) diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index f2e21817a22..d3b3cf74299 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -231,7 +231,7 @@ typedef struct xfs_perag #define XFS_FSB_TO_AGNO(mp,fsbno) \ ((xfs_agnumber_t)((fsbno) >> (mp)->m_sb.sb_agblklog)) #define XFS_FSB_TO_AGBNO(mp,fsbno) \ - ((xfs_agblock_t)((fsbno) & XFS_MASK32LO((mp)->m_sb.sb_agblklog))) + ((xfs_agblock_t)((fsbno) & xfs_mask32lo((mp)->m_sb.sb_agblklog))) #define XFS_AGB_TO_DADDR(mp,agno,agbno) \ ((xfs_daddr_t)XFS_FSB_TO_BB(mp, \ (xfs_fsblock_t)(agno) * (mp)->m_sb.sb_agblocks + (agbno))) diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h index bca7b243c31..f1e3c907044 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/xfs_bit.h @@ -23,24 +23,16 @@ */ /* - * masks with n high/low bits set, 32-bit values & 64-bit values + * masks with n high/low bits set, 64-bit values */ -#define XFS_MASK32HI(n) xfs_mask32hi(n) -static inline __uint32_t xfs_mask32hi(int n) -{ - return (__uint32_t)-1 << (32 - (n)); -} -#define XFS_MASK64HI(n) xfs_mask64hi(n) static inline __uint64_t xfs_mask64hi(int n) { return (__uint64_t)-1 << (64 - (n)); } -#define XFS_MASK32LO(n) xfs_mask32lo(n) static inline __uint32_t xfs_mask32lo(int n) { return ((__uint32_t)1 << (n)) - 1; } -#define XFS_MASK64LO(n) xfs_mask64lo(n) static inline __uint64_t xfs_mask64lo(int n) { return ((__uint64_t)1 << (n)) - 1; diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 8f1ec73725d..ba6b08c2fb0 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -110,16 +110,16 @@ __xfs_bmbt_get_all( ext_flag = (int)(l0 >> (64 - BMBT_EXNTFLAG_BITLEN)); s->br_startoff = ((xfs_fileoff_t)l0 & - XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; + xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; #if XFS_BIG_BLKNOS - s->br_startblock = (((xfs_fsblock_t)l0 & XFS_MASK64LO(9)) << 43) | + s->br_startblock = (((xfs_fsblock_t)l0 & xfs_mask64lo(9)) << 43) | (((xfs_fsblock_t)l1) >> 21); #else #ifdef DEBUG { xfs_dfsbno_t b; - b = (((xfs_dfsbno_t)l0 & XFS_MASK64LO(9)) << 43) | + b = (((xfs_dfsbno_t)l0 & xfs_mask64lo(9)) << 43) | (((xfs_dfsbno_t)l1) >> 21); ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b)); s->br_startblock = (xfs_fsblock_t)b; @@ -128,7 +128,7 @@ __xfs_bmbt_get_all( s->br_startblock = (xfs_fsblock_t)(((xfs_dfsbno_t)l1) >> 21); #endif /* DEBUG */ #endif /* XFS_BIG_BLKNOS */ - s->br_blockcount = (xfs_filblks_t)(l1 & XFS_MASK64LO(21)); + s->br_blockcount = (xfs_filblks_t)(l1 & xfs_mask64lo(21)); /* This is xfs_extent_state() in-line */ if (ext_flag) { ASSERT(s->br_blockcount != 0); /* saved for DMIG */ @@ -153,7 +153,7 @@ xfs_filblks_t xfs_bmbt_get_blockcount( xfs_bmbt_rec_host_t *r) { - return (xfs_filblks_t)(r->l1 & XFS_MASK64LO(21)); + return (xfs_filblks_t)(r->l1 & xfs_mask64lo(21)); } /* @@ -164,13 +164,13 @@ xfs_bmbt_get_startblock( xfs_bmbt_rec_host_t *r) { #if XFS_BIG_BLKNOS - return (((xfs_fsblock_t)r->l0 & XFS_MASK64LO(9)) << 43) | + return (((xfs_fsblock_t)r->l0 & xfs_mask64lo(9)) << 43) | (((xfs_fsblock_t)r->l1) >> 21); #else #ifdef DEBUG xfs_dfsbno_t b; - b = (((xfs_dfsbno_t)r->l0 & XFS_MASK64LO(9)) << 43) | + b = (((xfs_dfsbno_t)r->l0 & xfs_mask64lo(9)) << 43) | (((xfs_dfsbno_t)r->l1) >> 21); ASSERT((b >> 32) == 0 || ISNULLDSTARTBLOCK(b)); return (xfs_fsblock_t)b; @@ -188,7 +188,7 @@ xfs_bmbt_get_startoff( xfs_bmbt_rec_host_t *r) { return ((xfs_fileoff_t)r->l0 & - XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; + xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; } xfs_exntst_t @@ -219,7 +219,7 @@ xfs_filblks_t xfs_bmbt_disk_get_blockcount( xfs_bmbt_rec_t *r) { - return (xfs_filblks_t)(be64_to_cpu(r->l1) & XFS_MASK64LO(21)); + return (xfs_filblks_t)(be64_to_cpu(r->l1) & xfs_mask64lo(21)); } /* @@ -230,7 +230,7 @@ xfs_bmbt_disk_get_startoff( xfs_bmbt_rec_t *r) { return ((xfs_fileoff_t)be64_to_cpu(r->l0) & - XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; + xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN)) >> 9; } @@ -248,33 +248,33 @@ xfs_bmbt_set_allf( int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1; ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); - ASSERT((startoff & XFS_MASK64HI(64-BMBT_STARTOFF_BITLEN)) == 0); - ASSERT((blockcount & XFS_MASK64HI(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); + ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); + ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); #if XFS_BIG_BLKNOS - ASSERT((startblock & XFS_MASK64HI(64-BMBT_STARTBLOCK_BITLEN)) == 0); + ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | ((xfs_bmbt_rec_base_t)startoff << 9) | ((xfs_bmbt_rec_base_t)startblock >> 43); r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)); + (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); #else /* !XFS_BIG_BLKNOS */ if (ISNULLSTARTBLOCK(startblock)) { r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | ((xfs_bmbt_rec_base_t)startoff << 9) | - (xfs_bmbt_rec_base_t)XFS_MASK64LO(9); - r->l1 = XFS_MASK64HI(11) | + (xfs_bmbt_rec_base_t)xfs_mask64lo(9); + r->l1 = xfs_mask64hi(11) | ((xfs_bmbt_rec_base_t)startblock << 21) | ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)); + (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); } else { r->l0 = ((xfs_bmbt_rec_base_t)extent_flag << 63) | ((xfs_bmbt_rec_base_t)startoff << 9); r->l1 = ((xfs_bmbt_rec_base_t)startblock << 21) | ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)); + (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); } #endif /* XFS_BIG_BLKNOS */ } @@ -306,11 +306,11 @@ xfs_bmbt_disk_set_allf( int extent_flag = (state == XFS_EXT_NORM) ? 0 : 1; ASSERT(state == XFS_EXT_NORM || state == XFS_EXT_UNWRITTEN); - ASSERT((startoff & XFS_MASK64HI(64-BMBT_STARTOFF_BITLEN)) == 0); - ASSERT((blockcount & XFS_MASK64HI(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); + ASSERT((startoff & xfs_mask64hi(64-BMBT_STARTOFF_BITLEN)) == 0); + ASSERT((blockcount & xfs_mask64hi(64-BMBT_BLOCKCOUNT_BITLEN)) == 0); #if XFS_BIG_BLKNOS - ASSERT((startblock & XFS_MASK64HI(64-BMBT_STARTBLOCK_BITLEN)) == 0); + ASSERT((startblock & xfs_mask64hi(64-BMBT_STARTBLOCK_BITLEN)) == 0); r->l0 = cpu_to_be64( ((xfs_bmbt_rec_base_t)extent_flag << 63) | @@ -319,17 +319,17 @@ xfs_bmbt_disk_set_allf( r->l1 = cpu_to_be64( ((xfs_bmbt_rec_base_t)startblock << 21) | ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)XFS_MASK64LO(21))); + (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); #else /* !XFS_BIG_BLKNOS */ if (ISNULLSTARTBLOCK(startblock)) { r->l0 = cpu_to_be64( ((xfs_bmbt_rec_base_t)extent_flag << 63) | ((xfs_bmbt_rec_base_t)startoff << 9) | - (xfs_bmbt_rec_base_t)XFS_MASK64LO(9)); - r->l1 = cpu_to_be64(XFS_MASK64HI(11) | + (xfs_bmbt_rec_base_t)xfs_mask64lo(9)); + r->l1 = cpu_to_be64(xfs_mask64hi(11) | ((xfs_bmbt_rec_base_t)startblock << 21) | ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)XFS_MASK64LO(21))); + (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); } else { r->l0 = cpu_to_be64( ((xfs_bmbt_rec_base_t)extent_flag << 63) | @@ -337,7 +337,7 @@ xfs_bmbt_disk_set_allf( r->l1 = cpu_to_be64( ((xfs_bmbt_rec_base_t)startblock << 21) | ((xfs_bmbt_rec_base_t)blockcount & - (xfs_bmbt_rec_base_t)XFS_MASK64LO(21))); + (xfs_bmbt_rec_base_t)xfs_mask64lo(21))); } #endif /* XFS_BIG_BLKNOS */ } @@ -362,9 +362,9 @@ xfs_bmbt_set_blockcount( xfs_bmbt_rec_host_t *r, xfs_filblks_t v) { - ASSERT((v & XFS_MASK64HI(43)) == 0); - r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)XFS_MASK64HI(43)) | - (xfs_bmbt_rec_base_t)(v & XFS_MASK64LO(21)); + ASSERT((v & xfs_mask64hi(43)) == 0); + r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64hi(43)) | + (xfs_bmbt_rec_base_t)(v & xfs_mask64lo(21)); } /* @@ -376,21 +376,21 @@ xfs_bmbt_set_startblock( xfs_fsblock_t v) { #if XFS_BIG_BLKNOS - ASSERT((v & XFS_MASK64HI(12)) == 0); - r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)XFS_MASK64HI(55)) | + ASSERT((v & xfs_mask64hi(12)) == 0); + r->l0 = (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64hi(55)) | (xfs_bmbt_rec_base_t)(v >> 43); - r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)) | + r->l1 = (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)) | (xfs_bmbt_rec_base_t)(v << 21); #else /* !XFS_BIG_BLKNOS */ if (ISNULLSTARTBLOCK(v)) { - r->l0 |= (xfs_bmbt_rec_base_t)XFS_MASK64LO(9); - r->l1 = (xfs_bmbt_rec_base_t)XFS_MASK64HI(11) | + r->l0 |= (xfs_bmbt_rec_base_t)xfs_mask64lo(9); + r->l1 = (xfs_bmbt_rec_base_t)xfs_mask64hi(11) | ((xfs_bmbt_rec_base_t)v << 21) | - (r->l1 & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)); + (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); } else { - r->l0 &= ~(xfs_bmbt_rec_base_t)XFS_MASK64LO(9); + r->l0 &= ~(xfs_bmbt_rec_base_t)xfs_mask64lo(9); r->l1 = ((xfs_bmbt_rec_base_t)v << 21) | - (r->l1 & (xfs_bmbt_rec_base_t)XFS_MASK64LO(21)); + (r->l1 & (xfs_bmbt_rec_base_t)xfs_mask64lo(21)); } #endif /* XFS_BIG_BLKNOS */ } @@ -403,10 +403,10 @@ xfs_bmbt_set_startoff( xfs_bmbt_rec_host_t *r, xfs_fileoff_t v) { - ASSERT((v & XFS_MASK64HI(9)) == 0); - r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) XFS_MASK64HI(1)) | + ASSERT((v & xfs_mask64hi(9)) == 0); + r->l0 = (r->l0 & (xfs_bmbt_rec_base_t) xfs_mask64hi(1)) | ((xfs_bmbt_rec_base_t)v << 9) | - (r->l0 & (xfs_bmbt_rec_base_t)XFS_MASK64LO(9)); + (r->l0 & (xfs_bmbt_rec_base_t)xfs_mask64lo(9)); } /* @@ -419,9 +419,9 @@ xfs_bmbt_set_state( { ASSERT(v == XFS_EXT_NORM || v == XFS_EXT_UNWRITTEN); if (v == XFS_EXT_NORM) - r->l0 &= XFS_MASK64LO(64 - BMBT_EXNTFLAG_BITLEN); + r->l0 &= xfs_mask64lo(64 - BMBT_EXNTFLAG_BITLEN); else - r->l0 |= XFS_MASK64HI(BMBT_EXNTFLAG_BITLEN); + r->l0 |= xfs_mask64hi(BMBT_EXNTFLAG_BITLEN); } /* -- cgit From e6edbd1c1cbef278d58cdd8b046599ba8ac90cfc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Jan 2009 13:42:23 -0500 Subject: [XFS] fix compile of xfs_btree_readahead_lblock on m68k Change the left/right variables to the proper always 64bit xfs_dfsbo_t type because otherwise compilation fails for Geert on m68k without CONFIG_LBD: | fs/xfs/xfs_btree.c: In function 'xfs_btree_readahead_lblock': | fs/xfs/xfs_btree.c:736: warning: comparison is always true due to limited range of data type | fs/xfs/xfs_btree.c:741: warning: comparison is always true due to limited range of data type Reported-by: Geert Uytterhoeven Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_btree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 7ed59267420..2c3ef20f884 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -730,8 +730,8 @@ xfs_btree_readahead_lblock( struct xfs_btree_block *block) { int rval = 0; - xfs_fsblock_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); - xfs_fsblock_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); + xfs_dfsbno_t left = be64_to_cpu(block->bb_u.l.bb_leftsib); + xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) { xfs_btree_reada_bufl(cur->bc_mp, left, 1); -- cgit From 15440319767942a363f282d6585303d3d75088ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Jan 2009 14:00:00 -0500 Subject: [XFS] truncate readdir offsets to signed 32 bit values John Stanley reported EOVERFLOW errors in readdir from his self-build glibc. I traced this down to glibc enabling d_off overflow checks in one of the about five million different getdents implementations. In 2.6.28 Dave Woodhouse moved our readdir double buffering required for NFS4 readdirplus into nfsd and at that point we lost the capping of the directory offsets to 32 bit signed values. Johns glibc used getdents64 to even implement readdir for normal 32 bit offset dirents, and failed with EOVERFLOW only if this happens on the first dirent in a getdents call. I managed to come up with a testcase that uses raw getdents and does the EOVERFLOW check manually. We always hit it with our last entry due to the special end of directory marker. The patch below is a dumb version of just putting back the masking, to make sure we have the same behavior as in 2.6.27 and earlier. I will work on a better and cleaner fix for 2.6.30. Reported-by: John Stanley Tested-by: John Stanley Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_dir2_block.c | 7 ++++--- fs/xfs/xfs_dir2_leaf.c | 6 +++--- fs/xfs/xfs_dir2_sf.c | 15 ++++++++------- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index e2fa0a1d8e9..e1f0a06aaf0 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -517,9 +517,9 @@ xfs_dir2_block_getdents( /* * If it didn't fit, set the final offset to here & return. */ - if (filldir(dirent, dep->name, dep->namelen, cook, + if (filldir(dirent, dep->name, dep->namelen, cook & 0x7fffffff, ino, DT_UNKNOWN)) { - *offset = cook; + *offset = cook & 0x7fffffff; xfs_da_brelse(NULL, bp); return 0; } @@ -529,7 +529,8 @@ xfs_dir2_block_getdents( * Reached the end of the block. * Set the offset to a non-existent block 1 and return. */ - *offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0); + *offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & + 0x7fffffff; xfs_da_brelse(NULL, bp); return 0; } diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 93535992cb6..ef805a374ee 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -1092,7 +1092,7 @@ xfs_dir2_leaf_getdents( * Won't fit. Return to caller. */ if (filldir(dirent, dep->name, dep->namelen, - xfs_dir2_byte_to_dataptr(mp, curoff), + xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff, ino, DT_UNKNOWN)) break; @@ -1108,9 +1108,9 @@ xfs_dir2_leaf_getdents( * All done. Set output offset value to current offset. */ if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR)) - *offset = XFS_DIR2_MAX_DATAPTR; + *offset = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; else - *offset = xfs_dir2_byte_to_dataptr(mp, curoff); + *offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; kmem_free(map); if (bp) xfs_da_brelse(NULL, bp); diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index b46af0013ec..a8a8a6efad5 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -752,8 +752,8 @@ xfs_dir2_sf_getdents( #if XFS_BIG_INUMS ino += mp->m_inoadd; #endif - if (filldir(dirent, ".", 1, dot_offset, ino, DT_DIR)) { - *offset = dot_offset; + if (filldir(dirent, ".", 1, dot_offset & 0x7fffffff, ino, DT_DIR)) { + *offset = dot_offset & 0x7fffffff; return 0; } } @@ -766,8 +766,8 @@ xfs_dir2_sf_getdents( #if XFS_BIG_INUMS ino += mp->m_inoadd; #endif - if (filldir(dirent, "..", 2, dotdot_offset, ino, DT_DIR)) { - *offset = dotdot_offset; + if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) { + *offset = dotdot_offset & 0x7fffffff; return 0; } } @@ -791,14 +791,15 @@ xfs_dir2_sf_getdents( #endif if (filldir(dirent, sfep->name, sfep->namelen, - off, ino, DT_UNKNOWN)) { - *offset = off; + off & 0x7fffffff, ino, DT_UNKNOWN)) { + *offset = off & 0x7fffffff; return 0; } sfep = xfs_dir2_sf_nextentry(sfp, sfep); } - *offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0); + *offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & + 0x7fffffff; return 0; } -- cgit From 058652a37dd9eac18d6b8c1a311137c679de9dae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 8 Jan 2009 13:42:25 -0500 Subject: [XFS] make xfs_ino_t an unsigned long long Currently xfs_ino_t is defined as a u64 which can either be an unsigned long long or on some 64 bit platforms and unsigned long. Just making it and unsigned long long mean's it's still always 64 bits wide, but we don't need to resort to cases to print it. Fixes a warning regression on 64 bit powerpc in current git. Signed-off-by: Christoph Hellwig Reviewed-by: Dave Chinner Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index baedbd14dc2..b2f724502f1 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -45,7 +45,7 @@ typedef __uint32_t prid_t; /* project ID */ typedef __uint32_t inst_t; /* an instruction */ typedef __s64 xfs_off_t; /* type */ -typedef __u64 xfs_ino_t; /* type */ +typedef unsigned long long xfs_ino_t; /* type */ typedef __s64 xfs_daddr_t; /* type */ typedef char * xfs_caddr_t; /* type */ typedef __u32 xfs_dev_t; -- cgit From 958f8c0e4fc311e23a40635a530c01aec366a6e8 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 14:40:44 +1100 Subject: [XFS] remove old vmap cache XFS's vmap batching simply defers a number (up to 64) of vunmaps, and keeps track of them in a list. To purge the batch, it just goes through the list and calls vunamp on each one. This is pretty poor: a global TLB flush is generally still performed on each vunmap, with the most expensive parts of the operation being the broadcast IPIs and locking involved in the SMP callouts, and the locking involved in the vmap management -- none of these are avoided by just batching up the calls. I'm actually surprised it ever made much difference. (Now that the lazy vmap allocator is upstream, this description is not quite right, but the vunmap batching still doesn't seem to do much) Rip all this logic out of XFS completely. I will improve vmap performance and scalability directly in subsequent patch. Signed-off-by: Nick Piggin Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_buf.c | 75 +--------------------------------------------- 1 file changed, 1 insertion(+), 74 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index cb329edc925..0b2177a9fbd 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -165,75 +165,6 @@ test_page_region( return (mask && (page_private(page) & mask) == mask); } -/* - * Mapping of multi-page buffers into contiguous virtual space - */ - -typedef struct a_list { - void *vm_addr; - struct a_list *next; -} a_list_t; - -static a_list_t *as_free_head; -static int as_list_len; -static DEFINE_SPINLOCK(as_lock); - -/* - * Try to batch vunmaps because they are costly. - */ -STATIC void -free_address( - void *addr) -{ - a_list_t *aentry; - -#ifdef CONFIG_XEN - /* - * Xen needs to be able to make sure it can get an exclusive - * RO mapping of pages it wants to turn into a pagetable. If - * a newly allocated page is also still being vmap()ed by xfs, - * it will cause pagetable construction to fail. This is a - * quick workaround to always eagerly unmap pages so that Xen - * is happy. - */ - vunmap(addr); - return; -#endif - - aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT); - if (likely(aentry)) { - spin_lock(&as_lock); - aentry->next = as_free_head; - aentry->vm_addr = addr; - as_free_head = aentry; - as_list_len++; - spin_unlock(&as_lock); - } else { - vunmap(addr); - } -} - -STATIC void -purge_addresses(void) -{ - a_list_t *aentry, *old; - - if (as_free_head == NULL) - return; - - spin_lock(&as_lock); - aentry = as_free_head; - as_free_head = NULL; - as_list_len = 0; - spin_unlock(&as_lock); - - while ((old = aentry) != NULL) { - vunmap(aentry->vm_addr); - aentry = aentry->next; - kfree(old); - } -} - /* * Internal xfs_buf_t object manipulation */ @@ -333,7 +264,7 @@ xfs_buf_free( uint i; if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) - free_address(bp->b_addr - bp->b_offset); + vunmap(bp->b_addr - bp->b_offset); for (i = 0; i < bp->b_page_count; i++) { struct page *page = bp->b_pages[i]; @@ -455,8 +386,6 @@ _xfs_buf_map_pages( bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; bp->b_flags |= XBF_MAPPED; } else if (flags & XBF_MAPPED) { - if (as_list_len > 64) - purge_addresses(); bp->b_addr = vmap(bp->b_pages, bp->b_page_count, VM_MAP, PAGE_KERNEL); if (unlikely(bp->b_addr == NULL)) @@ -1743,8 +1672,6 @@ xfsbufd( count++; } - if (as_list_len > 0) - purge_addresses(); if (count) blk_run_address_space(target->bt_mapping); -- cgit From 0087167c9d5b1273e7e6bbe39a9ab13bdb9a39bb Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 14:43:09 +1100 Subject: [XFS] use scalable vmap API Implement XFS's large buffer support with the new vmap APIs. See the vmap rewrite (db64fe02) for some numbers. The biggest improvement that comes from using the new APIs is avoiding the global KVA allocation lock on every call. Signed-off-by: Nick Piggin Reviewed-by: Christoph Hellwig Signed-off-by: Lachlan McIlroy --- fs/xfs/linux-2.6/xfs_buf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 0b2177a9fbd..d71dc44e21e 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -264,7 +264,7 @@ xfs_buf_free( uint i; if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) - vunmap(bp->b_addr - bp->b_offset); + vm_unmap_ram(bp->b_addr - bp->b_offset, bp->b_page_count); for (i = 0; i < bp->b_page_count; i++) { struct page *page = bp->b_pages[i]; @@ -386,8 +386,8 @@ _xfs_buf_map_pages( bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; bp->b_flags |= XBF_MAPPED; } else if (flags & XBF_MAPPED) { - bp->b_addr = vmap(bp->b_pages, bp->b_page_count, - VM_MAP, PAGE_KERNEL); + bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, + -1, PAGE_KERNEL); if (unlikely(bp->b_addr == NULL)) return -ENOMEM; bp->b_addr += bp->b_offset; -- cgit From 0335cb76aa3fa913a2164bc9b669e5aef9d56fa3 Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Wed, 31 Dec 2008 12:10:12 +1100 Subject: [XFS] Update maintainers New maintainer contact and new tree location. Reviewed-by: Bill O`Donnell Signed-off-by: Lachlan McIlroy --- MAINTAINERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index a01884407fe..5f74bce1854 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4824,11 +4824,11 @@ S: Supported XFS FILESYSTEM P: Silicon Graphics Inc -P: Tim Shimmin +P: Bill O'Donnell M: xfs-masters@oss.sgi.com L: xfs@oss.sgi.com W: http://oss.sgi.com/projects/xfs -T: git git://oss.sgi.com:8090/xfs/xfs-2.6.git +T: git://oss.sgi.com/xfs/xfs.git S: Supported XILINX SYSTEMACE DRIVER -- cgit From 23faf63123e306b4a134f6d6c501813f6c8599dc Mon Sep 17 00:00:00 2001 From: roel kluin Date: Fri, 9 Jan 2009 15:49:04 -0700 Subject: powerpc/mpc5121: fix NULL test in mpc5121_clk_get utility function. strcmp on NULL results in a segmentation fault, also, remove the second, redundant test on dev Signed-off-by: Roel Kluin Signed-off-by: Grant Likely --- arch/powerpc/platforms/512x/clock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/512x/clock.c b/arch/powerpc/platforms/512x/clock.c index f416014ee72..1bcff94eb92 100644 --- a/arch/powerpc/platforms/512x/clock.c +++ b/arch/powerpc/platforms/512x/clock.c @@ -56,12 +56,12 @@ static struct clk *mpc5121_clk_get(struct device *dev, const char *id) int dev_match = 0; int id_match = 0; - if (dev == NULL && id == NULL) + if (dev == NULL || id == NULL) return NULL; mutex_lock(&clocks_mutex); list_for_each_entry(p, &clocks, node) { - if (dev && dev == p->dev) + if (dev == p->dev) dev_match++; if (strcmp(id, p->name) == 0) id_match++; -- cgit From d30239a0ef2000cfae6c60f8e870da2d8dba4d22 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Fri, 9 Jan 2009 15:49:05 -0700 Subject: powerpc/mpc52xx: Properly update irq_desc when set_type() is called. The MPC5200 PIC driver doesn't correctly update the .status field of the irq_desc structure when the set_type hook is called. This patch adds the required code. Also cleans up the external IRQ typename field to be something easier to read (very minor). Signed-off-by: Grant Likely --- arch/powerpc/platforms/52xx/mpc52xx_pic.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_pic.c b/arch/powerpc/platforms/52xx/mpc52xx_pic.c index 72865e8e4b5..0a093f03c75 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_pic.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_pic.c @@ -196,6 +196,7 @@ static void mpc52xx_extirq_ack(unsigned int virq) static int mpc52xx_extirq_set_type(unsigned int virq, unsigned int flow_type) { + struct irq_desc *desc = get_irq_desc(virq); u32 ctrl_reg, type; int irq; int l2irq; @@ -222,6 +223,11 @@ static int mpc52xx_extirq_set_type(unsigned int virq, unsigned int flow_type) type = 0; } + desc->status &= ~(IRQ_TYPE_SENSE_MASK | IRQ_LEVEL); + desc->status |= flow_type & IRQ_TYPE_SENSE_MASK; + if (flow_type & (IRQ_TYPE_LEVEL_HIGH | IRQ_TYPE_LEVEL_LOW)) + desc->status |= IRQ_LEVEL; + ctrl_reg = in_be32(&intr->ctrl); ctrl_reg &= ~(0x3 << (22 - (l2irq * 2))); ctrl_reg |= (type << (22 - (l2irq * 2))); @@ -231,7 +237,7 @@ static int mpc52xx_extirq_set_type(unsigned int virq, unsigned int flow_type) } static struct irq_chip mpc52xx_extirq_irqchip = { - .typename = " MPC52xx IRQ[0-3] ", + .typename = "MPC52xx External", .mask = mpc52xx_extirq_mask, .unmask = mpc52xx_extirq_unmask, .ack = mpc52xx_extirq_ack, -- cgit From ddd527d56a0e635abdc7ffeb688cdde7593ad2d3 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 9 Jan 2009 15:49:05 -0700 Subject: powerpc/mpc52xx: remove dead code from GPIO driver Eliminate duplicate return statements Signed-off-by: Wolfram Sang Signed-off-by: Grant Likely --- arch/powerpc/platforms/52xx/mpc52xx_gpio.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpio.c b/arch/powerpc/platforms/52xx/mpc52xx_gpio.c index 8a455ebce98..07f89ae46d0 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpio.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpio.c @@ -363,11 +363,8 @@ static int mpc52xx_gpt_gpio_get(struct gpio_chip *gc, unsigned int gpio) { struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc); struct mpc52xx_gpt __iomem *regs = mm_gc->regs; - unsigned int ret; return (in_be32(®s->status) & (1 << (31 - 23))) ? 1 : 0; - - return ret; } static void -- cgit From f5020384e4fa8ab9397aa6fa176e61e9bf7947f7 Mon Sep 17 00:00:00 2001 From: Yuri Tikhonov Date: Fri, 9 Jan 2009 15:49:06 -0700 Subject: powerpc/xsysace: add compatible string for non-ipcore instance Add "xlnx,sysace" compatible string to the of_platform binding table. Platforms which have the SysACE chip on board (e.g. Katmai) instead of via a Xilinx generated IP core will use this value in their device tree. Signed-off-by: Yuri Tikhonov Signed-off-by: Grant Likely --- drivers/block/xsysace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 29e1dfafb7c..381d686fc1a 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -1206,6 +1206,7 @@ static struct of_device_id ace_of_match[] __devinitdata = { { .compatible = "xlnx,opb-sysace-1.00.b", }, { .compatible = "xlnx,opb-sysace-1.00.c", }, { .compatible = "xlnx,xps-sysace-1.00.a", }, + { .compatible = "xlnx,sysace", }, {}, }; MODULE_DEVICE_TABLE(of, ace_of_match); -- cgit From 2526c151c31358aec66b63921dd712bbec5ee0cb Mon Sep 17 00:00:00 2001 From: Jon Smirl Date: Fri, 9 Jan 2009 15:49:06 -0700 Subject: drivers/of: Add the of_find_i2c_device_by_node function. The of_find_i2c_device_by_node function allows you to follow a reference in the device tree to an i2c device node and then locate the linux device instantiated by the device tree. Example use: an I2S bus driver finding the i2c_device instance for a codec described by a device tree node. This was waiting for Anton's i2c patches that were just added. Signed-off-by: Jon Smirl Signed-off-by: Grant Likely --- drivers/of/of_i2c.c | 19 +++++++++++++++++++ include/linux/of_i2c.h | 3 +++ 2 files changed, 22 insertions(+) diff --git a/drivers/of/of_i2c.c b/drivers/of/of_i2c.c index e1b0ad6e918..fa65a2b2ae2 100644 --- a/drivers/of/of_i2c.c +++ b/drivers/of/of_i2c.c @@ -66,4 +66,23 @@ void of_register_i2c_devices(struct i2c_adapter *adap, } EXPORT_SYMBOL(of_register_i2c_devices); +static int of_dev_node_match(struct device *dev, void *data) +{ + return dev_archdata_get_node(&dev->archdata) == data; +} + +/* must call put_device() when done with returned i2c_client device */ +struct i2c_client *of_find_i2c_device_by_node(struct device_node *node) +{ + struct device *dev; + + dev = bus_find_device(&i2c_bus_type, NULL, node, + of_dev_node_match); + if (!dev) + return NULL; + + return to_i2c_client(dev); +} +EXPORT_SYMBOL(of_find_i2c_device_by_node); + MODULE_LICENSE("GPL"); diff --git a/include/linux/of_i2c.h b/include/linux/of_i2c.h index bd2a870ec29..34974b5a76f 100644 --- a/include/linux/of_i2c.h +++ b/include/linux/of_i2c.h @@ -17,4 +17,7 @@ void of_register_i2c_devices(struct i2c_adapter *adap, struct device_node *adap_node); +/* must call put_device() when done with returned i2c_client device */ +struct i2c_client *of_find_i2c_device_by_node(struct device_node *node); + #endif /* __LINUX_OF_I2C_H */ -- cgit From ff82c587a9a4cb8796e7e04377155deba15ae18b Mon Sep 17 00:00:00 2001 From: John Linn Date: Fri, 9 Jan 2009 16:01:53 -0700 Subject: Xilinx: SPI: updated driver for device tree The driver was updated to use the device tree rather than the platform data. Signed-off-by: John Linn --- drivers/spi/xilinx_spi.c | 137 ++++++++++++++++++++++++++++------------------- 1 file changed, 81 insertions(+), 56 deletions(-) diff --git a/drivers/spi/xilinx_spi.c b/drivers/spi/xilinx_spi.c index 68d6f4988fb..fe7e5f35e5d 100644 --- a/drivers/spi/xilinx_spi.c +++ b/drivers/spi/xilinx_spi.c @@ -15,12 +15,15 @@ #include #include #include + +#include +#include +#include + #include #include #include -#include - #define XILINX_SPI_NAME "xilinx_spi" /* Register definitions as per "OPB Serial Peripheral Interface (SPI) (v1.00e) @@ -144,23 +147,14 @@ static int xilinx_spi_setup_transfer(struct spi_device *spi, struct spi_transfer *t) { u8 bits_per_word; - u32 hz; - struct xilinx_spi *xspi = spi_master_get_devdata(spi->master); bits_per_word = (t) ? t->bits_per_word : spi->bits_per_word; - hz = (t) ? t->speed_hz : spi->max_speed_hz; if (bits_per_word != 8) { dev_err(&spi->dev, "%s, unsupported bits_per_word=%d\n", __func__, bits_per_word); return -EINVAL; } - if (hz && xspi->speed_hz > hz) { - dev_err(&spi->dev, "%s, unsupported clock rate %uHz\n", - __func__, hz); - return -EINVAL; - } - return 0; } @@ -304,32 +298,38 @@ static irqreturn_t xilinx_spi_irq(int irq, void *dev_id) return IRQ_HANDLED; } -static int __init xilinx_spi_probe(struct platform_device *dev) +static int __init xilinx_spi_of_probe(struct of_device *ofdev, + const struct of_device_id *match) { - int ret = 0; struct spi_master *master; struct xilinx_spi *xspi; - struct xspi_platform_data *pdata; - struct resource *r; + struct resource r_irq_struct; + struct resource r_mem_struct; + + struct resource *r_irq = &r_irq_struct; + struct resource *r_mem = &r_mem_struct; + int rc = 0; + const u32 *prop; + int len; /* Get resources(memory, IRQ) associated with the device */ - master = spi_alloc_master(&dev->dev, sizeof(struct xilinx_spi)); + master = spi_alloc_master(&ofdev->dev, sizeof(struct xilinx_spi)); if (master == NULL) { return -ENOMEM; } - platform_set_drvdata(dev, master); - pdata = dev->dev.platform_data; + dev_set_drvdata(&ofdev->dev, master); - if (pdata == NULL) { - ret = -ENODEV; + rc = of_address_to_resource(ofdev->node, 0, r_mem); + if (rc) { + dev_warn(&ofdev->dev, "invalid address\n"); goto put_master; } - r = platform_get_resource(dev, IORESOURCE_MEM, 0); - if (r == NULL) { - ret = -ENODEV; + rc = of_irq_to_resource(ofdev->node, 0, r_irq); + if (rc == NO_IRQ) { + dev_warn(&ofdev->dev, "no IRQ found\n"); goto put_master; } @@ -341,47 +341,57 @@ static int __init xilinx_spi_probe(struct platform_device *dev) xspi->bitbang.master->setup = xilinx_spi_setup; init_completion(&xspi->done); - if (!request_mem_region(r->start, - r->end - r->start + 1, XILINX_SPI_NAME)) { - ret = -ENXIO; + xspi->irq = r_irq->start; + + if (!request_mem_region(r_mem->start, + r_mem->end - r_mem->start + 1, XILINX_SPI_NAME)) { + rc = -ENXIO; + dev_warn(&ofdev->dev, "memory request failure\n"); goto put_master; } - xspi->regs = ioremap(r->start, r->end - r->start + 1); + xspi->regs = ioremap(r_mem->start, r_mem->end - r_mem->start + 1); if (xspi->regs == NULL) { - ret = -ENOMEM; + rc = -ENOMEM; + dev_warn(&ofdev->dev, "ioremap failure\n"); goto put_master; } + xspi->irq = r_irq->start; - ret = platform_get_irq(dev, 0); - if (ret < 0) { - ret = -ENXIO; - goto unmap_io; - } - xspi->irq = ret; + /* dynamic bus assignment */ + master->bus_num = -1; - master->bus_num = pdata->bus_num; - master->num_chipselect = pdata->num_chipselect; - xspi->speed_hz = pdata->speed_hz; + /* number of slave select bits is required */ + prop = of_get_property(ofdev->node, "xlnx,num-ss-bits", &len); + if (!prop || len < sizeof(*prop)) { + dev_warn(&ofdev->dev, "no 'xlnx,num-ss-bits' property\n"); + goto put_master; + } + master->num_chipselect = *prop; /* SPI controller initializations */ xspi_init_hw(xspi->regs); /* Register for SPI Interrupt */ - ret = request_irq(xspi->irq, xilinx_spi_irq, 0, XILINX_SPI_NAME, xspi); - if (ret != 0) + rc = request_irq(xspi->irq, xilinx_spi_irq, 0, XILINX_SPI_NAME, xspi); + if (rc != 0) { + dev_warn(&ofdev->dev, "irq request failure: %d\n", xspi->irq); goto unmap_io; + } - ret = spi_bitbang_start(&xspi->bitbang); - if (ret != 0) { - dev_err(&dev->dev, "spi_bitbang_start FAILED\n"); + rc = spi_bitbang_start(&xspi->bitbang); + if (rc != 0) { + dev_err(&ofdev->dev, "spi_bitbang_start FAILED\n"); goto free_irq; } - dev_info(&dev->dev, "at 0x%08X mapped to 0x%08X, irq=%d\n", - r->start, (u32)xspi->regs, xspi->irq); + dev_info(&ofdev->dev, "at 0x%08X mapped to 0x%08X, irq=%d\n", + (unsigned int)r_mem->start, (u32)xspi->regs, xspi->irq); - return ret; + /* Add any subnodes on the SPI bus */ + of_register_spi_devices(master, ofdev->node); + + return rc; free_irq: free_irq(xspi->irq, xspi); @@ -389,21 +399,21 @@ unmap_io: iounmap(xspi->regs); put_master: spi_master_put(master); - return ret; + return rc; } -static int __devexit xilinx_spi_remove(struct platform_device *dev) +static int __devexit xilinx_spi_remove(struct of_device *ofdev) { struct xilinx_spi *xspi; struct spi_master *master; - master = platform_get_drvdata(dev); + master = platform_get_drvdata(ofdev); xspi = spi_master_get_devdata(master); spi_bitbang_stop(&xspi->bitbang); free_irq(xspi->irq, xspi); iounmap(xspi->regs); - platform_set_drvdata(dev, 0); + dev_set_drvdata(&ofdev->dev, 0); spi_master_put(xspi->bitbang.master); return 0; @@ -412,27 +422,42 @@ static int __devexit xilinx_spi_remove(struct platform_device *dev) /* work with hotplug and coldplug */ MODULE_ALIAS("platform:" XILINX_SPI_NAME); -static struct platform_driver xilinx_spi_driver = { - .probe = xilinx_spi_probe, - .remove = __devexit_p(xilinx_spi_remove), +static int __exit xilinx_spi_of_remove(struct of_device *op) +{ + return xilinx_spi_remove(op); +} + +static struct of_device_id xilinx_spi_of_match[] = { + { .compatible = "xlnx,xps-spi-2.00.a", }, + { .compatible = "xlnx,xps-spi-2.00.b", }, + {} +}; + +MODULE_DEVICE_TABLE(of, xilinx_spi_of_match); + +static struct of_platform_driver xilinx_spi_of_driver = { + .owner = THIS_MODULE, + .name = "xilinx-xps-spi", + .match_table = xilinx_spi_of_match, + .probe = xilinx_spi_of_probe, + .remove = __exit_p(xilinx_spi_of_remove), .driver = { - .name = XILINX_SPI_NAME, + .name = "xilinx-xps-spi", .owner = THIS_MODULE, }, }; static int __init xilinx_spi_init(void) { - return platform_driver_register(&xilinx_spi_driver); + return of_register_platform_driver(&xilinx_spi_of_driver); } module_init(xilinx_spi_init); static void __exit xilinx_spi_exit(void) { - platform_driver_unregister(&xilinx_spi_driver); + of_unregister_platform_driver(&xilinx_spi_of_driver); } module_exit(xilinx_spi_exit); - MODULE_AUTHOR("MontaVista Software, Inc. "); MODULE_DESCRIPTION("Xilinx SPI driver"); MODULE_LICENSE("GPL"); -- cgit From 71e0ffa599f54058d9b8724b4b14d0486751681d Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Thu, 8 Jan 2009 10:42:15 -0800 Subject: drm/i915: don't enable vblanks on disabled pipes In some cases userland may be confused and try to wait on vblank events from pipes that aren't actually enabled. We shouldn't allow this, so return -EINVAL if the pipe isn't on. Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_irq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 0cadafbef41..6290219de6c 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -411,6 +411,12 @@ int i915_enable_vblank(struct drm_device *dev, int pipe) { drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; unsigned long irqflags; + int pipeconf_reg = (pipe == 0) ? PIPEACONF : PIPEBCONF; + u32 pipeconf; + + pipeconf = I915_READ(pipeconf_reg); + if (!(pipeconf & PIPEACONF_ENABLE)) + return -EINVAL; spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags); if (IS_I965G(dev)) -- cgit From dc1336ff4fe08ae7cfe8301bfd7f0b2cfd31d20a Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 6 Jan 2009 10:21:24 -0800 Subject: drm/i915: set vblank enabled flag correctly across IRQ install/uninstall In the absence of kernel mode setting, many drivers disable IRQs across VT switch. The core DRM vblank code is missing a check for this case however; even after IRQ disable, the vblank code will still have the vblank_enabled flag set, so unless we track the fact that they're disabled at IRQ uninstall time, when we VT switch back in we won't actually re-enable them, which means any apps waiting on vblank before the switch will hang. This patch does that and also adds a sanity check to the wait condition to look for the irq_enabled flag in general, as well as adding a wakeup to the IRQ uninstall path. Fixes fdo bug #18879 with compiz hangs at VT switch. Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_irq.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c index 724e505873c..477caa1b1e4 100644 --- a/drivers/gpu/drm/drm_irq.c +++ b/drivers/gpu/drm/drm_irq.c @@ -267,7 +267,8 @@ EXPORT_SYMBOL(drm_irq_install); */ int drm_irq_uninstall(struct drm_device * dev) { - int irq_enabled; + unsigned long irqflags; + int irq_enabled, i; if (!drm_core_check_feature(dev, DRIVER_HAVE_IRQ)) return -EINVAL; @@ -277,6 +278,16 @@ int drm_irq_uninstall(struct drm_device * dev) dev->irq_enabled = 0; mutex_unlock(&dev->struct_mutex); + /* + * Wake up any waiters so they don't hang. + */ + spin_lock_irqsave(&dev->vbl_lock, irqflags); + for (i = 0; i < dev->num_crtcs; i++) { + DRM_WAKEUP(&dev->vbl_queue[i]); + dev->vblank_enabled[i] = 0; + } + spin_unlock_irqrestore(&dev->vbl_lock, irqflags); + if (!irq_enabled) return -EINVAL; @@ -652,8 +663,9 @@ int drm_wait_vblank(struct drm_device *dev, void *data, vblwait->request.sequence, crtc); dev->last_vblank_wait[crtc] = vblwait->request.sequence; DRM_WAIT_ON(ret, dev->vbl_queue[crtc], 3 * DRM_HZ, - ((drm_vblank_count(dev, crtc) - - vblwait->request.sequence) <= (1 << 23))); + (((drm_vblank_count(dev, crtc) - + vblwait->request.sequence) <= (1 << 23)) || + !dev->irq_enabled)); if (ret != -EINTR) { struct timeval now; -- cgit From 01e3eb82278bf45221fc38b391bc5ee0f6a314d6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 Jan 2009 13:00:50 +0100 Subject: Revert "sched: improve preempt debugging" This reverts commit 7317d7b87edb41a9135e30be1ec3f7ef817c53dd. This has been reported (and bisected) by Alexey Zaytsev and Kamalesh Babulal to produce annoying warnings during bootup on both x86 and powerpc. kernel_locked() is not a valid test in IRQ context (we update the BKL's ->lock_depth and the preempt count separately and non-atomicalyy), so we cannot put it into the generic preempt debugging checks which can run in IRQ contexts too. Reported-and-bisected-by: Alexey Zaytsev Reported-and-bisected-by: Kamalesh Babulal Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched.c b/kernel/sched.c index 8be2c13b50d..3b630d88266 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4440,7 +4440,7 @@ void __kprobes sub_preempt_count(int val) /* * Underflow? */ - if (DEBUG_LOCKS_WARN_ON(val > preempt_count() - (!!kernel_locked()))) + if (DEBUG_LOCKS_WARN_ON(val > preempt_count())) return; /* * Is the spinlock portion underflowing? -- cgit From 3a03ac1a0223f779a3de313523408ddb099e5679 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 11 Jan 2009 09:03:49 +1000 Subject: drm/i915: setup sarea properly in master_priv If we are running DRI1 userspace, we really need to set the sarea up properly. thanks to Richard for finding/testing this. Signed-off-by: Richard Purdie Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_dma.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 62a4bf7b49d..868f574363a 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -177,6 +177,14 @@ static int i915_initialize(struct drm_device * dev, drm_i915_init_t * init) drm_i915_private_t *dev_priv = dev->dev_private; struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv; + master_priv->sarea = drm_getsarea(dev); + if (master_priv->sarea) { + master_priv->sarea_priv = (drm_i915_sarea_t *) + ((u8 *)master_priv->sarea->handle + init->sarea_priv_offset); + } else { + DRM_DEBUG("sarea not found assuming DRI2 userspace\n"); + } + if (init->ring_size != 0) { if (dev_priv->ring.ring_obj != NULL) { i915_dma_cleanup(dev); -- cgit From 8476a6571005f9440adda08ca4d6c69c7f4db30b Mon Sep 17 00:00:00 2001 From: Erik Ekman Date: Tue, 30 Dec 2008 22:49:28 +0100 Subject: Wireless: Fix Kconfig fact error Raytheon cards use 2.4 GHz, not 2.4 MHz. See http://www.hpl.hp.com/personal/Jean_Tourrilhes/Linux/Linux.Wireless.drivers.html#Raylink Signed-off-by: Erik Ekman Signed-off-by: John W. Linville --- drivers/net/wireless/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig index ea543fcf268..e4f9f747de8 100644 --- a/drivers/net/wireless/Kconfig +++ b/drivers/net/wireless/Kconfig @@ -111,7 +111,7 @@ config WLAN_80211 lets you choose drivers. config PCMCIA_RAYCS - tristate "Aviator/Raytheon 2.4MHz wireless support" + tristate "Aviator/Raytheon 2.4GHz wireless support" depends on PCMCIA && WLAN_80211 select WIRELESS_EXT ---help--- -- cgit From d1b29405bd3590bc97c4d3ff2c9139ca55e56ccd Mon Sep 17 00:00:00 2001 From: Andrew Price Date: Fri, 2 Jan 2009 08:05:27 +0000 Subject: rt2x00: Fix radio LED type check Since "rt2x00: Fix LED state handling", rt2x00leds_led_radio wrongly checks that the LED type is LED_TYPE_ASSOC. This patch makes it check for LED_TYPE_RADIO once again. Signed-off-by: Andrew Price Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00leds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00leds.c b/drivers/net/wireless/rt2x00/rt2x00leds.c index 68f4e0fc35b..a0cd35b6beb 100644 --- a/drivers/net/wireless/rt2x00/rt2x00leds.c +++ b/drivers/net/wireless/rt2x00/rt2x00leds.c @@ -97,7 +97,7 @@ void rt2x00leds_led_assoc(struct rt2x00_dev *rt2x00dev, bool enabled) void rt2x00leds_led_radio(struct rt2x00_dev *rt2x00dev, bool enabled) { - if (rt2x00dev->led_radio.type == LED_TYPE_ASSOC) + if (rt2x00dev->led_radio.type == LED_TYPE_RADIO) rt2x00led_led_simple(&rt2x00dev->led_radio, enabled); } -- cgit From 51fb80fefe736db1182551fec6528d1ef095b0ea Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sat, 3 Jan 2009 12:45:12 -0600 Subject: p54usb: Fix to prevent SKB memory allocation errors with 4K page size On x86_64 architecture with 4K page size and SLUB debugging enabled, stress testing on p54usb has resulted in skb allocation failures of O(1) and extreme page fragmentation. Reducing rx_mtu fixes this problem by reducing the size of all receive skb allocations to be of O(0). This change does not impact performance in any way. Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/p54/p54common.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c index 82354b974a0..06c64744df2 100644 --- a/drivers/net/wireless/p54/p54common.c +++ b/drivers/net/wireless/p54/p54common.c @@ -138,6 +138,7 @@ int p54_parse_firmware(struct ieee80211_hw *dev, const struct firmware *fw) u8 *fw_version = NULL; size_t len; int i; + int maxlen; if (priv->rx_start) return 0; @@ -195,6 +196,16 @@ int p54_parse_firmware(struct ieee80211_hw *dev, const struct firmware *fw) else priv->rx_mtu = (size_t) 0x620 - priv->tx_hdr_len; + maxlen = priv->tx_hdr_len + /* USB devices */ + sizeof(struct p54_rx_data) + + 4 + /* rx alignment */ + IEEE80211_MAX_FRAG_THRESHOLD; + if (priv->rx_mtu > maxlen && PAGE_SIZE == 4096) { + printk(KERN_INFO "p54: rx_mtu reduced from %d " + "to %d\n", priv->rx_mtu, + maxlen); + priv->rx_mtu = maxlen; + } break; } case BR_CODE_EXPOSED_IF: -- cgit From 3be36ae223271f9c2cfbe7406846c8fdcd2f50c3 Mon Sep 17 00:00:00 2001 From: Stefan Lippers-Hollmann Date: Sun, 4 Jan 2009 01:10:49 +0100 Subject: rt2x00: add USB ID for the Linksys WUSB200. add USB ID for the Linksys WUSB200 Wireless-G Business USB Adapter to rt73usb. Signed-off-by: Stefan Lippers-Hollmann Cc: stable Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt73usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/rt2x00/rt73usb.c b/drivers/net/wireless/rt2x00/rt73usb.c index d638a8a5937..96a8d69f879 100644 --- a/drivers/net/wireless/rt2x00/rt73usb.c +++ b/drivers/net/wireless/rt2x00/rt73usb.c @@ -2321,6 +2321,7 @@ static struct usb_device_id rt73usb_device_table[] = { /* Linksys */ { USB_DEVICE(0x13b1, 0x0020), USB_DEVICE_DATA(&rt73usb_ops) }, { USB_DEVICE(0x13b1, 0x0023), USB_DEVICE_DATA(&rt73usb_ops) }, + { USB_DEVICE(0x13b1, 0x0028), USB_DEVICE_DATA(&rt73usb_ops) }, /* MSI */ { USB_DEVICE(0x0db0, 0x6877), USB_DEVICE_DATA(&rt73usb_ops) }, { USB_DEVICE(0x0db0, 0x6874), USB_DEVICE_DATA(&rt73usb_ops) }, -- cgit From 3ea96463156123cbfd09ac412012a87fef068830 Mon Sep 17 00:00:00 2001 From: Ivo van Doorn Date: Sun, 4 Jan 2009 17:33:25 +0100 Subject: rt2x00: Fix TX short preamble detection The short preamble mode was not correctly detected during TX, rt2x00 used the rate->hw_value_short field but mac80211 is not using this field that way. Instead the flag IEEE80211_TX_RC_USE_SHORT_PREAMBLE should be used to determine if the frame should be send out using short preamble or not. Signed-off-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00dev.c | 8 +++----- drivers/net/wireless/rt2x00/rt2x00lib.h | 11 ----------- drivers/net/wireless/rt2x00/rt2x00queue.c | 2 +- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00dev.c b/drivers/net/wireless/rt2x00/rt2x00dev.c index 6d92542fcf0..87c0f2c8307 100644 --- a/drivers/net/wireless/rt2x00/rt2x00dev.c +++ b/drivers/net/wireless/rt2x00/rt2x00dev.c @@ -807,13 +807,11 @@ static void rt2x00lib_rate(struct ieee80211_rate *entry, { entry->flags = 0; entry->bitrate = rate->bitrate; - entry->hw_value = rt2x00_create_rate_hw_value(index, 0); - entry->hw_value_short = entry->hw_value; + entry->hw_value =index; + entry->hw_value_short = index; - if (rate->flags & DEV_RATE_SHORT_PREAMBLE) { + if (rate->flags & DEV_RATE_SHORT_PREAMBLE) entry->flags |= IEEE80211_RATE_SHORT_PREAMBLE; - entry->hw_value_short |= rt2x00_create_rate_hw_value(index, 1); - } } static int rt2x00lib_probe_hw_modes(struct rt2x00_dev *rt2x00dev, diff --git a/drivers/net/wireless/rt2x00/rt2x00lib.h b/drivers/net/wireless/rt2x00/rt2x00lib.h index 03024327767..86cd26fbf76 100644 --- a/drivers/net/wireless/rt2x00/rt2x00lib.h +++ b/drivers/net/wireless/rt2x00/rt2x00lib.h @@ -52,22 +52,11 @@ struct rt2x00_rate { extern const struct rt2x00_rate rt2x00_supported_rates[12]; -static inline u16 rt2x00_create_rate_hw_value(const u16 index, - const u16 short_preamble) -{ - return (short_preamble << 8) | (index & 0xff); -} - static inline const struct rt2x00_rate *rt2x00_get_rate(const u16 hw_value) { return &rt2x00_supported_rates[hw_value & 0xff]; } -static inline int rt2x00_get_rate_preamble(const u16 hw_value) -{ - return (hw_value & 0xff00); -} - /* * Radio control handlers. */ diff --git a/drivers/net/wireless/rt2x00/rt2x00queue.c b/drivers/net/wireless/rt2x00/rt2x00queue.c index eaec6bd93ed..746a8f36b93 100644 --- a/drivers/net/wireless/rt2x00/rt2x00queue.c +++ b/drivers/net/wireless/rt2x00/rt2x00queue.c @@ -313,7 +313,7 @@ static void rt2x00queue_create_tx_descriptor(struct queue_entry *entry, * When preamble is enabled we should set the * preamble bit for the signal. */ - if (rt2x00_get_rate_preamble(rate->hw_value)) + if (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE) txdesc->signal |= 0x08; } } -- cgit From 878e6a432f85690a2c0d88d96f177e54ff1d4a57 Mon Sep 17 00:00:00 2001 From: Michiel Date: Sun, 4 Jan 2009 17:22:28 -0600 Subject: p54usb: Add USB ID for Thomson Speedtouch 121g Add the USB ID for Thomson Speedtouch 121g to p54usb. Signed-off-by: Michiel Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/p54/p54usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c index c44a200059d..8f5c063b854 100644 --- a/drivers/net/wireless/p54/p54usb.c +++ b/drivers/net/wireless/p54/p54usb.c @@ -56,6 +56,7 @@ static struct usb_device_id p54u_table[] __devinitdata = { {USB_DEVICE(0x050d, 0x7050)}, /* Belkin F5D7050 ver 1000 */ {USB_DEVICE(0x0572, 0x2000)}, /* Cohiba Proto board */ {USB_DEVICE(0x0572, 0x2002)}, /* Cohiba Proto board */ + {USB_DEVICE(0x06b9, 0x0121)}, /* Thomson SpeedTouch 121g */ {USB_DEVICE(0x0707, 0xee13)}, /* SMC 2862W-G version 2 */ {USB_DEVICE(0x083a, 0x4521)}, /* Siemens Gigaset USB Adapter 54 version 2 */ {USB_DEVICE(0x0846, 0x4240)}, /* Netgear WG111 (v2) */ -- cgit From 176ddc7dcfe3fd93778f52abf9a947d92932f19e Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 5 Jan 2009 13:51:24 +0200 Subject: ath9k: Enforce module build if rfkill is a module CONFIG_ATH9K=y results in build issues if CONFIG_RFKILL=m since ath9k does not depend on rfkill in kconfig (i.e., CONFIG_RFKILL is used to select whether to enable rfkill in ath9k), but uses its functions if rfkill is enabled. Enforce ath9k to be build as a module if CONFIG_RFKILL=m to avoid this invalid configuration. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/ath9k/Kconfig b/drivers/net/wireless/ath9k/Kconfig index c43bd321f97..90a8dd87378 100644 --- a/drivers/net/wireless/ath9k/Kconfig +++ b/drivers/net/wireless/ath9k/Kconfig @@ -1,6 +1,7 @@ config ATH9K tristate "Atheros 802.11n wireless cards support" depends on PCI && MAC80211 && WLAN_80211 + depends on RFKILL || RFKILL=n select MAC80211_LEDS select LEDS_CLASS select NEW_LEDS -- cgit From 20953ad68ee522f6420b63c200ac9b23f96d937a Mon Sep 17 00:00:00 2001 From: David Kilroy Date: Wed, 7 Jan 2009 00:23:55 +0000 Subject: orinoco: take the driver lock in the rx tasklet Fix the warning reproduced below. We add to rx_list in interrupt context and remove elements in tasklet context. While removing elements we need to prevent the interrupt modifying the list. Note that "orinoco: Process bulk of receive interrupt in a tasklet" did not preserve locking semantics on what is now orinoco_rx. This patch reinstates the locking semantics and ensures it covers rx_list as well. This leads to additional cleanup required in free_orinocodev. [89479.105038] WARNING: at lib/list_debug.c:30 __list_add+0x8f/0xa0() [89479.105058] list_add corruption. prev->next should be next (dddb3568), but was cbc28978. (prev=dddb3568). [89479.106002] Pid: 15746, comm: X Not tainted 2.6.28-1avb #26 [89479.106020] Call Trace: [89479.106062] [] warn_slowpath+0x60/0x80 [89479.106104] [] ? native_sched_clock+0x20/0x70 [89479.106194] [] ? lock_release_holdtime+0x35/0x200 [89479.106218] [] ? __slab_alloc+0x550/0x560 [89479.106254] [] ? _spin_unlock+0x1d/0x20 [89479.106270] [] ? __slab_alloc+0x550/0x560 [89479.106302] [] ? delay_tsc+0x17/0x24 [89479.106319] [] ? __const_udelay+0x21/0x30 [89479.106376] [] ? hermes_bap_seek+0x112/0x1e0 [hermes] [89479.106396] [] ? trace_hardirqs_off+0xb/0x10 [89479.106418] [] ? __kmalloc_track_caller+0xb7/0x110 [89479.106448] [] ? dev_alloc_skb+0x1c/0x30 [89479.106465] [] ? dev_alloc_skb+0x1c/0x30 [89479.106482] [] __list_add+0x8f/0xa0 [89479.106551] [] orinoco_interrupt+0xcae/0x16c0 [orinoco] [89479.106574] [] ? tick_dev_program_event+0x33/0xb0 [89479.106594] [] ? native_sched_clock+0x20/0x70 [89479.106613] [] ? lock_release_holdtime+0x35/0x200 [89479.106662] [] ? trace_hardirqs_off+0xb/0x10 [89479.106892] [] ? usb_hcd_irq+0x97/0xa0 [usbcore] [89479.106926] [] handle_IRQ_event+0x29/0x60 [89479.106947] [] handle_level_irq+0x69/0xe0 [89479.106963] [] ? handle_level_irq+0x0/0xe0 [89479.106977] [] ? tcp_v4_rcv+0x633/0x6e0 [89479.107025] [] ? common_interrupt+0x28/0x30 [89479.107057] [] ? sk_run_filter+0x320/0x7a0 [89479.107078] [] ? list_del+0x21/0x90 [89479.107106] [] ? orinoco_rx_isr_tasklet+0x2ce/0x480 [orinoco] [89479.107131] [] ? __lock_acquire+0x160/0x1650 [89479.107151] [] ? native_sched_clock+0x20/0x70 [89479.107169] [] ? lock_release_holdtime+0x35/0x200 [89479.107200] [] ? irq_enter+0xa/0x60 [89479.107217] [] ? do_IRQ+0xd2/0x130 [89479.107518] [] ? restore_nocheck_notrace+0x0/0xe [89479.107542] [] ? __do_softirq+0x0/0x110 [89479.107561] [] ? trace_hardirqs_on_caller+0x74/0x140 [89479.107583] [] ? trace_hardirqs_on_thunk+0xc/0x10 [89479.107602] [] ? tasklet_action+0x27/0x90 [89479.107620] [] ? trace_hardirqs_on_caller+0x74/0x140 [89479.107638] [] ? tasklet_action+0x43/0x90 [89479.107655] [] ? __do_softirq+0x6f/0x110 [89479.107674] [] ? __do_softirq+0x0/0x110 [89479.107685] [] ? handle_level_irq+0x0/0xe0 [89479.107715] [] ? irq_exit+0x5d/0x80 [89479.107732] [] ? do_IRQ+0xd2/0x130 [89479.107747] [] ? sysenter_exit+0xf/0x16 [89479.107765] [] ? trace_hardirqs_on_caller+0xfd/0x140 [89479.107782] [] ? common_interrupt+0x28/0x30 [89479.107797] ---[ end trace a1fc0a52df4a729d ]--- Reported-by: Andrey Borzenkov Signed-off-by: David Kilroy Signed-off-by: John W. Linville --- drivers/net/wireless/orinoco/orinoco.c | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/orinoco/orinoco.c b/drivers/net/wireless/orinoco/orinoco.c index bc84e2792f8..c3bb85e0251 100644 --- a/drivers/net/wireless/orinoco/orinoco.c +++ b/drivers/net/wireless/orinoco/orinoco.c @@ -1610,6 +1610,16 @@ static void orinoco_rx_isr_tasklet(unsigned long data) struct orinoco_rx_data *rx_data, *temp; struct hermes_rx_descriptor *desc; struct sk_buff *skb; + unsigned long flags; + + /* orinoco_rx requires the driver lock, and we also need to + * protect priv->rx_list, so just hold the lock over the + * lot. + * + * If orinoco_lock fails, we've unplugged the card. In this + * case just abort. */ + if (orinoco_lock(priv, &flags) != 0) + return; /* extract desc and skb from queue */ list_for_each_entry_safe(rx_data, temp, &priv->rx_list, list) { @@ -1622,6 +1632,8 @@ static void orinoco_rx_isr_tasklet(unsigned long data) kfree(desc); } + + orinoco_unlock(priv, &flags); } /********************************************************************/ @@ -3645,12 +3657,22 @@ struct net_device void free_orinocodev(struct net_device *dev) { struct orinoco_private *priv = netdev_priv(dev); + struct orinoco_rx_data *rx_data, *temp; - /* No need to empty priv->rx_list: if the tasklet is scheduled - * when we call tasklet_kill it will run one final time, - * emptying the list */ + /* If the tasklet is scheduled when we call tasklet_kill it + * will run one final time. However the tasklet will only + * drain priv->rx_list if the hw is still available. */ tasklet_kill(&priv->rx_tasklet); + /* Explicitly drain priv->rx_list */ + list_for_each_entry_safe(rx_data, temp, &priv->rx_list, list) { + list_del(&rx_data->list); + + dev_kfree_skb(rx_data->skb); + kfree(rx_data->desc); + kfree(rx_data); + } + unregister_pm_notifier(&priv->pm_notifier); orinoco_uncache_fw(priv); -- cgit From 86060f0d691f5ee1b4ef4efe770b683e54ac438d Mon Sep 17 00:00:00 2001 From: Sujith Date: Wed, 7 Jan 2009 14:25:29 +0530 Subject: ath9k: Fix chainmask handling bug The chainmasks have to be updated before setting the channel, since the HW reset routine uses them to set the appropriate registers. Signed-off-by: Sujith Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath9k/main.c b/drivers/net/wireless/ath9k/main.c index 191eec50dc7..727f067aca4 100644 --- a/drivers/net/wireless/ath9k/main.c +++ b/drivers/net/wireless/ath9k/main.c @@ -2164,13 +2164,13 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed) conf->ht.channel_type); } + ath_update_chainmask(sc, conf->ht.enabled); + if (ath_set_channel(sc, &sc->sc_ah->ah_channels[pos]) < 0) { DPRINTF(sc, ATH_DBG_FATAL, "Unable to set channel\n"); mutex_unlock(&sc->mutex); return -EINVAL; } - - ath_update_chainmask(sc, conf->ht.enabled); } if (changed & IEEE80211_CONF_CHANGE_POWER) -- cgit From d732129b25b972c208c9705759c8c64f63a21800 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 8 Jan 2009 10:20:00 -0800 Subject: iwlwifi: Fix get_cmd_string() for REPLY_3945_RX 0x1b is a 3945 specific command, we should print it too when debugging. Signed-off-by: Samuel Ortiz Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-hcmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/iwlwifi/iwl-hcmd.c b/drivers/net/wireless/iwlwifi/iwl-hcmd.c index 01a2169cece..8c71ad4f88c 100644 --- a/drivers/net/wireless/iwlwifi/iwl-hcmd.c +++ b/drivers/net/wireless/iwlwifi/iwl-hcmd.c @@ -51,6 +51,7 @@ const char *get_cmd_string(u8 cmd) IWL_CMD(REPLY_REMOVE_STA); IWL_CMD(REPLY_REMOVE_ALL_STA); IWL_CMD(REPLY_WEPKEY); + IWL_CMD(REPLY_3945_RX); IWL_CMD(REPLY_TX); IWL_CMD(REPLY_RATE_SCALE); IWL_CMD(REPLY_LEDS_CMD); -- cgit From 706ea9b66935e341b063d860c9c8f279b37b5578 Mon Sep 17 00:00:00 2001 From: Pavel Roskin Date: Fri, 9 Jan 2009 12:31:48 -0500 Subject: orinoco_cs: add ID for ARtem Onair Comcard 11 Reported by Michael Jarosch Signed-off-by: Pavel Roskin Signed-off-by: John W. Linville --- drivers/net/wireless/orinoco/orinoco_cs.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/orinoco/orinoco_cs.c b/drivers/net/wireless/orinoco/orinoco_cs.c index f127602670e..0b32215d3f5 100644 --- a/drivers/net/wireless/orinoco/orinoco_cs.c +++ b/drivers/net/wireless/orinoco/orinoco_cs.c @@ -435,6 +435,7 @@ static struct pcmcia_device_id orinoco_cs_ids[] = { PCMCIA_DEVICE_MANF_CARD(0x0250, 0x0002), /* Samsung SWL2000-N 11Mb/s WLAN Card */ PCMCIA_DEVICE_MANF_CARD(0x0261, 0x0002), /* AirWay 802.11 Adapter (PCMCIA) */ PCMCIA_DEVICE_MANF_CARD(0x0268, 0x0001), /* ARtem Onair */ + PCMCIA_DEVICE_MANF_CARD(0x0268, 0x0003), /* ARtem Onair Comcard 11 */ PCMCIA_DEVICE_MANF_CARD(0x026f, 0x0305), /* Buffalo WLI-PCM-S11 */ PCMCIA_DEVICE_MANF_CARD(0x0274, 0x1612), /* Linksys WPC11 Version 2.5 */ PCMCIA_DEVICE_MANF_CARD(0x0274, 0x1613), /* Linksys WPC11 Version 3 */ -- cgit From c1d34c1dad76be6d515ef33e24eb92f10547b08b Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sat, 20 Dec 2008 02:21:37 +0100 Subject: p54: crypto offload fixes This patch fixes two small flaws: - restore the original TKIP IV if we altered it. - reserve & initialize ICV with zeros. This is actually only necessary for some obsolete p54usb firmwares. But we don't know yet, if all devices are compatible with the new revisions. Signed-off-by: Christian Lamparter Signed-off-by: John W. Linville --- drivers/net/wireless/p54/p54common.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c index 06c64744df2..37294a657f8 100644 --- a/drivers/net/wireless/p54/p54common.c +++ b/drivers/net/wireless/p54/p54common.c @@ -809,6 +809,16 @@ static void p54_rx_frame_sent(struct ieee80211_hw *dev, struct sk_buff *skb) info->flags |= IEEE80211_TX_STAT_TX_FILTERED; info->status.ack_signal = p54_rssi_to_dbm(dev, (int)payload->ack_rssi); + + if (entry_data->key_type == P54_CRYPTO_TKIPMICHAEL) { + u8 *iv = (u8 *)(entry_data->align + pad + + entry_data->crypt_offset); + + /* Restore the original TKIP IV. */ + iv[2] = iv[0]; + iv[0] = iv[1]; + iv[1] = (iv[0] | 0x20) & 0x7f; /* WEPSeed - 8.3.2.2 */ + } skb_pull(entry, sizeof(*hdr) + pad + sizeof(*entry_data)); ieee80211_tx_status_irqsafe(dev, entry); goto out; @@ -1394,7 +1404,6 @@ static int p54_tx(struct ieee80211_hw *dev, struct sk_buff *skb) hdr->tries = ridx; txhdr->rts_rate_idx = 0; if (info->control.hw_key) { - crypt_offset += info->control.hw_key->iv_len; txhdr->key_type = p54_convert_algo(info->control.hw_key->alg); txhdr->key_len = min((u8)16, info->control.hw_key->keylen); memcpy(txhdr->key, info->control.hw_key->key, txhdr->key_len); @@ -1408,6 +1417,8 @@ static int p54_tx(struct ieee80211_hw *dev, struct sk_buff *skb) } /* reserve some space for ICV */ len += info->control.hw_key->icv_len; + memset(skb_put(skb, info->control.hw_key->icv_len), 0, + info->control.hw_key->icv_len); } else { txhdr->key_type = 0; txhdr->key_len = 0; -- cgit From 00627f229c9807e4cb825a7ce36b886e2adf2229 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sat, 20 Dec 2008 02:21:56 +0100 Subject: p54usb: fix random traffic stalls (LM87) All LM87 firmwares need a explicit termination "packet", in oder to finish the pending transfer properly. Signed-off-by: Christian Lamparter Signed-off-by: John W. Linville --- drivers/net/wireless/p54/p54usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c index 8f5c063b854..6a6a72f6f82 100644 --- a/drivers/net/wireless/p54/p54usb.c +++ b/drivers/net/wireless/p54/p54usb.c @@ -285,6 +285,7 @@ static void p54u_tx_lm87(struct ieee80211_hw *dev, struct sk_buff *skb) usb_fill_bulk_urb(data_urb, priv->udev, usb_sndbulkpipe(priv->udev, P54U_PIPE_DATA), skb->data, skb->len, p54u_tx_cb, skb); + data_urb->transfer_flags |= URB_ZERO_PACKET; usb_anchor_urb(data_urb, &priv->submitted); if (usb_submit_urb(data_urb, GFP_ATOMIC)) { -- cgit From d15cfc3ac77388f1d588c57743d5f26b15eba9a8 Mon Sep 17 00:00:00 2001 From: Ivo van Doorn Date: Sat, 20 Dec 2008 11:00:23 +0100 Subject: rt2x00: Fix segementation fault The queue_end() macro points to 1 position after the queue, which means that if we want to know if queue is at the end of the queue we should first increment the position and then check if it is a valid entry. This fixes a segmentation fault which only occurs when the device has enough endpoints to provide a dedicated endpoint for all TX queues (which likely won't happen for rt2500usb and rt73usb, but will happen for rt2800usb). Signed-off-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2x00usb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2x00usb.c b/drivers/net/wireless/rt2x00/rt2x00usb.c index 83df312ac56..0b29d767a25 100644 --- a/drivers/net/wireless/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/rt2x00/rt2x00usb.c @@ -434,11 +434,11 @@ static int rt2x00usb_find_endpoints(struct rt2x00_dev *rt2x00dev) if (usb_endpoint_is_bulk_in(ep_desc)) { rt2x00usb_assign_endpoint(rt2x00dev->rx, ep_desc); - } else if (usb_endpoint_is_bulk_out(ep_desc)) { + } else if (usb_endpoint_is_bulk_out(ep_desc) && + (queue != queue_end(rt2x00dev))) { rt2x00usb_assign_endpoint(queue, ep_desc); + queue = queue_next(queue); - if (queue != queue_end(rt2x00dev)) - queue = queue_next(queue); tx_ep_desc = ep_desc; } } -- cgit From 1061787967db03975dc02030d6815811f4eb9231 Mon Sep 17 00:00:00 2001 From: Daniel Wu Date: Sat, 20 Dec 2008 10:53:29 -0800 Subject: iwlwifi: Fix typo in iwl-commands.h for CCK rate bit range. My first (minor) patch, hopefully this is correct. Fix a typo in iwl-commands.h for CCK rates which needs 7 bits and not 4. Signed-off-by: Daniel Wu Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-commands.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-commands.h b/drivers/net/wireless/iwlwifi/iwl-commands.h index 52966ffbef6..ba997204c8d 100644 --- a/drivers/net/wireless/iwlwifi/iwl-commands.h +++ b/drivers/net/wireless/iwlwifi/iwl-commands.h @@ -255,7 +255,7 @@ struct iwl_cmd_header { * 0x3) 54 Mbps * * Legacy CCK rate format for bits 7:0 (bit 8 must be "0", bit 9 "1"): - * 3-0: 10) 1 Mbps + * 6-0: 10) 1 Mbps * 20) 2 Mbps * 55) 5.5 Mbps * 110) 11 Mbps -- cgit From b55eae3349ff5d6d088c7ab0151260d5e3dbd26d Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sun, 21 Dec 2008 15:40:33 -0600 Subject: rtl8180: Fix to add STA mode To be compatible with mac80211 following "mac80211: only create default STA interface if supported", rtl8180 needs to set NL80211_IFTYPE_STATION in interface_modes. Signed-off-by: Larry Finger Reported-by: Fabio Rossi Tested-by: Piter PUNK Acked-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/rtl818x/rtl8180_dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/rtl818x/rtl8180_dev.c b/drivers/net/wireless/rtl818x/rtl8180_dev.c index 5f887fb137a..387c133ec0f 100644 --- a/drivers/net/wireless/rtl818x/rtl8180_dev.c +++ b/drivers/net/wireless/rtl818x/rtl8180_dev.c @@ -897,6 +897,7 @@ static int __devinit rtl8180_probe(struct pci_dev *pdev, dev->flags = IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING | IEEE80211_HW_RX_INCLUDES_FCS | IEEE80211_HW_SIGNAL_UNSPEC; + dev->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION); dev->queues = 1; dev->max_signal = 65; -- cgit From f3d340c1d536fd3e5a104c99ac9c3f8694270d72 Mon Sep 17 00:00:00 2001 From: Ivo van Doorn Date: Sun, 21 Dec 2008 23:19:17 +0100 Subject: Fix rt2500usb HW crypto: TKIP rt2500usb doesn't strip the IV/ICV data from received frames, so we don't need to set the RX_FLAG_IV_STRIPPED flag. We do need to set the RX_FLAG_MMIC_STRIPPED flag for all encryption types since the MMIC has been removed from the frame. After this patch TKIP Hardware crypto works for rt2500usb. WEP and AES are still failing. Signed-off-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2500usb.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index 30028e2422f..065f111f01f 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -376,11 +376,11 @@ static int rt2500usb_config_key(struct rt2x00_dev *rt2x00dev, /* * The driver does not support the IV/EIV generation - * in hardware. However it doesn't support the IV/EIV - * inside the ieee80211 frame either, but requires it - * to be provided seperately for the descriptor. - * rt2x00lib will cut the IV/EIV data out of all frames - * given to us by mac80211, but we must tell mac80211 + * in hardware. However it demands the data to be provided + * both seperately as well as inside the frame. + * We already provided the CONFIG_CRYPTO_COPY_IV to rt2x00lib + * to ensure rt2x00lib will not strip the data from the + * frame after the copy, now we must tell mac80211 * to generate the IV/EIV data. */ key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; @@ -1334,14 +1334,7 @@ static void rt2500usb_fill_rxdone(struct queue_entry *entry, /* ICV is located at the end of frame */ - /* - * Hardware has stripped IV/EIV data from 802.11 frame during - * decryption. It has provided the data seperately but rt2x00lib - * should decide if it should be reinserted. - */ - rxdesc->flags |= RX_FLAG_IV_STRIPPED; - if (rxdesc->cipher != CIPHER_TKIP) - rxdesc->flags |= RX_FLAG_MMIC_STRIPPED; + rxdesc->flags |= RX_FLAG_MMIC_STRIPPED; if (rxdesc->cipher_status == RX_CRYPTO_SUCCESS) rxdesc->flags |= RX_FLAG_DECRYPTED; else if (rxdesc->cipher_status == RX_CRYPTO_FAIL_MIC) -- cgit From d3a1db1c67735063921d9186145fc86164cf9781 Mon Sep 17 00:00:00 2001 From: Senthil Balasubramanian Date: Mon, 22 Dec 2008 16:31:58 +0530 Subject: ath9k: Fix incorrect sequence numbering for unaggregated QoS Frame. This patch fixes an issue with the sequence numbers of unaggregated QoS frames, because of which the frames are handled in a different order at the AP and resulted in MLME REPLAYFAILURE. Signed-off-by: Senthil Balasubramanian Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/xmit.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/drivers/net/wireless/ath9k/xmit.c b/drivers/net/wireless/ath9k/xmit.c index 3bfc3b90f25..1ea9428c0cd 100644 --- a/drivers/net/wireless/ath9k/xmit.c +++ b/drivers/net/wireless/ath9k/xmit.c @@ -264,25 +264,22 @@ static void assign_aggr_tid_seqno(struct sk_buff *skb, } /* Get seqno */ - - if (ieee80211_is_data(fc) && !is_pae(skb)) { - /* For HT capable stations, we save tidno for later use. - * We also override seqno set by upper layer with the one - * in tx aggregation state. - * - * If fragmentation is on, the sequence number is - * not overridden, since it has been - * incremented by the fragmentation routine. - * - * FIXME: check if the fragmentation threshold exceeds - * IEEE80211 max. - */ - tid = ATH_AN_2_TID(an, bf->bf_tidno); - hdr->seq_ctrl = cpu_to_le16(tid->seq_next << - IEEE80211_SEQ_SEQ_SHIFT); - bf->bf_seqno = tid->seq_next; - INCR(tid->seq_next, IEEE80211_SEQ_MAX); - } + /* For HT capable stations, we save tidno for later use. + * We also override seqno set by upper layer with the one + * in tx aggregation state. + * + * If fragmentation is on, the sequence number is + * not overridden, since it has been + * incremented by the fragmentation routine. + * + * FIXME: check if the fragmentation threshold exceeds + * IEEE80211 max. + */ + tid = ATH_AN_2_TID(an, bf->bf_tidno); + hdr->seq_ctrl = cpu_to_le16(tid->seq_next << + IEEE80211_SEQ_SEQ_SHIFT); + bf->bf_seqno = tid->seq_next; + INCR(tid->seq_next, IEEE80211_SEQ_MAX); } static int setup_tx_flags(struct ath_softc *sc, struct sk_buff *skb, @@ -1718,11 +1715,10 @@ static int ath_tx_setup_buffer(struct ath_softc *sc, struct ath_buf *bf, /* Assign seqno, tidno */ - if (bf_isht(bf) && (sc->sc_flags & SC_OP_TXAGGR)) + if (ieee80211_is_data_qos(fc) && (sc->sc_flags & SC_OP_TXAGGR)) assign_aggr_tid_seqno(skb, bf); /* DMA setup */ - bf->bf_mpdu = skb; bf->bf_dmacontext = pci_map_single(sc->pdev, skb->data, -- cgit From d57854bb1d78ba89ffbfdfd1c3e95b52ed7478ff Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Mon, 22 Dec 2008 15:35:31 +0100 Subject: minstrel: fix warning if lowest supported rate index is not 0 This patch fixes the following WARNING (caused by rix_to_ndx): " >WARNING: at net/mac80211/rc80211_minstrel.c:69 minstrel_rate_init+0xd2/0x33a [mac80211]() >[...] >Call Trace: > warn_on_slowpath+0x51/0x75 > _format_mac_addr+0x4c/0x88 > minstrel_rate_init+0xd2/0x33a [mac80211] > print_mac+0x16/0x1b > schedule_hrtimeout_range+0xdc/0x107 > ieee80211_add_station+0x158/0x1bd [mac80211] > nl80211_new_station+0x1b3/0x20b [cfg80211] The reason is that I'm experimenting with "g" only mode on a 802.11 b/g card. Therefore rate_lowest_index returns 4 (= 6Mbit, instead of usual 0 = 1Mbit). Since mi->r array is initialized with zeros in minstrel_alloc_sta, rix_to_ndx has a hard time to find the 6Mbit entry and will trigged the WARNING. Signed-off-by: Christian Lamparter Acked-by: Felix Fietkau Signed-off-by: John W. Linville --- net/mac80211/rc80211_minstrel.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index 2b3b490a607..3824990d340 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -395,13 +395,15 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, { struct minstrel_sta_info *mi = priv_sta; struct minstrel_priv *mp = priv; - struct minstrel_rate *mr_ctl; + struct ieee80211_local *local = hw_to_local(mp->hw); + struct ieee80211_rate *ctl_rate; unsigned int i, n = 0; unsigned int t_slot = 9; /* FIXME: get real slot time */ mi->lowest_rix = rate_lowest_index(sband, sta); - mr_ctl = &mi->r[rix_to_ndx(mi, mi->lowest_rix)]; - mi->sp_ack_dur = mr_ctl->ack_time; + ctl_rate = &sband->bitrates[mi->lowest_rix]; + mi->sp_ack_dur = ieee80211_frame_duration(local, 10, ctl_rate->bitrate, + !!(ctl_rate->flags & IEEE80211_RATE_ERP_G), 1); for (i = 0; i < sband->n_bitrates; i++) { struct minstrel_rate *mr = &mi->r[n]; @@ -416,7 +418,7 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, mr->rix = i; mr->bitrate = sband->bitrates[i].bitrate / 5; - calc_rate_durations(mi, hw_to_local(mp->hw), mr, + calc_rate_durations(mi, local, mr, &sband->bitrates[i]); /* calculate maximum number of retransmissions before -- cgit From 157ec8768457e8177d281ae099fb1c321c9a16d7 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 22 Dec 2008 16:45:54 +0200 Subject: ath9k: Revert fix to TX status reporting for retries and MCS index This patch reverts "ath9k: Fix TX status reporting for retries and MCS index" because that change ended up breaking ath9k rate control. While the MCS index reporting to mac80211 was indeed fixed by the patch, it did not take into account that the ath9k rate control algorithm was updating private tables based on this index and the index comes through the rate control API call, i.e., based on mac80211 TX status call. In addition, it looks like the "fix" to remove +1 from TX status 'count' field was not correct based on ieee80211_tx_status() implementation that counts the total of count values, but starting from -1, not 0. The TX status reporting for frames using MCS needs to be fixed somehow, but it does not look like there is any easy fix for the ath9k rate control algorithm, so the best option now seems to be to revert the change and bring it back once the rate control code is cleaned up to handle this better. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- drivers/net/wireless/ath9k/xmit.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/net/wireless/ath9k/xmit.c b/drivers/net/wireless/ath9k/xmit.c index 1ea9428c0cd..c92f0c6e4ad 100644 --- a/drivers/net/wireless/ath9k/xmit.c +++ b/drivers/net/wireless/ath9k/xmit.c @@ -126,15 +126,7 @@ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, tx_info->flags |= IEEE80211_TX_STAT_ACK; } - tx_info->status.rates[0].count = tx_status->retries; - if (tx_info->status.rates[0].flags & IEEE80211_TX_RC_MCS) { - /* Change idx from internal table index to MCS index */ - int idx = tx_info->status.rates[0].idx; - struct ath_rate_table *rate_table = sc->cur_rate_table; - if (idx >= 0 && idx < rate_table->rate_cnt) - tx_info->status.rates[0].idx = - rate_table->info[idx].ratecode & 0x7f; - } + tx_info->status.rates[0].count = tx_status->retries + 1; hdrlen = ieee80211_get_hdrlen_from_skb(skb); padsize = hdrlen & 3; -- cgit From 4fb7404e0eaf574c00d01d2b1ce2615229b350cd Mon Sep 17 00:00:00 2001 From: Steve Brown Date: Tue, 23 Dec 2008 07:57:05 -0500 Subject: ath5k: Correct usage of AR5K_CFG_ADHOC This corrects usage of AR5K_CFG_ADHOC introduced in "ath5k: Update PCU code". Also, the name of the indicator is changed to AR5K_CFG_IBSS to more accurately reflect its function. This change restores beaconing in AP and mesh modes. Signed-off-by: Steve Brown Signed-off-by: John W. Linville --- drivers/net/wireless/ath5k/pcu.c | 4 ++-- drivers/net/wireless/ath5k/reg.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/ath5k/pcu.c b/drivers/net/wireless/ath5k/pcu.c index 0cac05c6a9c..75eb9f43c74 100644 --- a/drivers/net/wireless/ath5k/pcu.c +++ b/drivers/net/wireless/ath5k/pcu.c @@ -65,7 +65,7 @@ int ath5k_hw_set_opmode(struct ath5k_hw *ah) if (ah->ah_version == AR5K_AR5210) pcu_reg |= AR5K_STA_ID1_NO_PSPOLL; else - AR5K_REG_DISABLE_BITS(ah, AR5K_CFG, AR5K_CFG_ADHOC); + AR5K_REG_ENABLE_BITS(ah, AR5K_CFG, AR5K_CFG_IBSS); break; case NL80211_IFTYPE_AP: @@ -75,7 +75,7 @@ int ath5k_hw_set_opmode(struct ath5k_hw *ah) if (ah->ah_version == AR5K_AR5210) pcu_reg |= AR5K_STA_ID1_NO_PSPOLL; else - AR5K_REG_ENABLE_BITS(ah, AR5K_CFG, AR5K_CFG_ADHOC); + AR5K_REG_DISABLE_BITS(ah, AR5K_CFG, AR5K_CFG_IBSS); break; case NL80211_IFTYPE_STATION: diff --git a/drivers/net/wireless/ath5k/reg.h b/drivers/net/wireless/ath5k/reg.h index 91aaeaf8819..9189ab13286 100644 --- a/drivers/net/wireless/ath5k/reg.h +++ b/drivers/net/wireless/ath5k/reg.h @@ -73,7 +73,7 @@ #define AR5K_CFG_SWRD 0x00000004 /* Byte-swap RX descriptor */ #define AR5K_CFG_SWRB 0x00000008 /* Byte-swap RX buffer */ #define AR5K_CFG_SWRG 0x00000010 /* Byte-swap Register access */ -#define AR5K_CFG_ADHOC 0x00000020 /* AP/Adhoc indication [5211+] */ +#define AR5K_CFG_IBSS 0x00000020 /* 0-BSS, 1-IBSS [5211+] */ #define AR5K_CFG_PHY_OK 0x00000100 /* [5211+] */ #define AR5K_CFG_EEBS 0x00000200 /* EEPROM is busy */ #define AR5K_CFG_CLKGD 0x00000400 /* Clock gated (Disable dynamic clock) */ -- cgit From dcebf45cdc8384be9009b2b9a825054b64742768 Mon Sep 17 00:00:00 2001 From: Pavel Roskin Date: Mon, 22 Dec 2008 16:39:36 -0500 Subject: mac80211: allow mode change if IBSS is not allowed Changing mode on an interface is not allowed if IBSS is disabled for the current channel. That restriction should only apply when switching to the ad-hoc mode, as it was prior to "cfg80211: handle SIOCGIWMODE/SIOCSIWMODE". Signed-off-by: Pavel Roskin Signed-off-by: John W. Linville --- net/mac80211/iface.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 5abbc3f07dd..b9074824862 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -699,7 +699,8 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, return 0; /* Setting ad-hoc mode on non-IBSS channel is not supported. */ - if (sdata->local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS) + if (sdata->local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS && + type == NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; /* -- cgit From 124b68e755c2ef9342d5d477142c499fd7901360 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 26 Dec 2008 19:09:45 +0100 Subject: p54: fix WARN_ON at line 2247 of net/mac80211/rx.c This patch hopefully fixes a mac80211<->p54 interaction problem, which was described by Larry Finger (ref: http://marc.info/?l=linux-wireless&m=123009889327707 ) I guess the warning was triggered by pending frames in the receive queue, while we're doing a band change 5GHz. Signed-off-by: Christian Lamparter Signed-off-by: John W. Linville --- drivers/net/wireless/p54/p54common.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c index 37294a657f8..cba89ed0f57 100644 --- a/drivers/net/wireless/p54/p54common.c +++ b/drivers/net/wireless/p54/p54common.c @@ -586,6 +586,7 @@ static int p54_rx_data(struct ieee80211_hw *dev, struct sk_buff *skb) u16 freq = le16_to_cpu(hdr->freq); size_t header_len = sizeof(*hdr); u32 tsf32; + u8 rate = hdr->rate & 0xf; /* * If the device is in a unspecified state we have to @@ -614,8 +615,11 @@ static int p54_rx_data(struct ieee80211_hw *dev, struct sk_buff *skb) rx_status.qual = (100 * hdr->rssi) / 127; if (hdr->rate & 0x10) rx_status.flag |= RX_FLAG_SHORTPRE; - rx_status.rate_idx = (dev->conf.channel->band == IEEE80211_BAND_2GHZ ? - hdr->rate : (hdr->rate - 4)) & 0xf; + if (dev->conf.channel->band == IEEE80211_BAND_5GHZ) + rx_status.rate_idx = (rate < 4) ? 0 : rate - 4; + else + rx_status.rate_idx = rate; + rx_status.freq = freq; rx_status.band = dev->conf.channel->band; rx_status.antenna = hdr->antenna; -- cgit From d6e2be988d5146d1faa8df895cd8b32106d987bd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 5 Jan 2009 23:11:26 -0600 Subject: rtl8187: Fix module so that rmmod/insmod does not error Due to misunderstanding of the returned values allowed for the tx callback of mac80211, rtl8187 was using skb's that had been freed. This problem was triggered when the module was sujected to a rmmod/insmod cycle. After that was fixed, the modules would not work after the rmmod/insmod cycle until the USB device was reset. Signed-off-by: Johannes Berg Signed-off-by: Larry Finger Signed-off-by: John W. Linville --- drivers/net/wireless/rtl818x/rtl8187_dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rtl818x/rtl8187_dev.c b/drivers/net/wireless/rtl818x/rtl8187_dev.c index 00ce3ef39ab..6ad6bac3770 100644 --- a/drivers/net/wireless/rtl818x/rtl8187_dev.c +++ b/drivers/net/wireless/rtl818x/rtl8187_dev.c @@ -213,7 +213,7 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb) urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) { kfree_skb(skb); - return -ENOMEM; + return NETDEV_TX_OK; } flags = skb->len; @@ -281,7 +281,7 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb) } usb_free_urb(urb); - return rc; + return NETDEV_TX_OK; } static void rtl8187_rx_cb(struct urb *urb) @@ -1471,6 +1471,7 @@ static void __devexit rtl8187_disconnect(struct usb_interface *intf) ieee80211_unregister_hw(dev); priv = dev->priv; + usb_reset_device(priv->udev); usb_put_dev(interface_to_usbdev(intf)); ieee80211_free_hw(dev); } -- cgit From 71ef99c8b79ab07e1c79794085481464f9870d62 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Mon, 5 Jan 2009 20:46:34 -0500 Subject: ath5k: fix return values from ath5k_tx Should return NETDEV_TX_{OK,BUSY} instead of 0,-1 (this doesn't change any current functionality). Changes-licensed-under: 3-Clause-BSD Reported-by: Johannes Berg Signed-off-by: Bob Copeland Signed-off-by: John W. Linville --- drivers/net/wireless/ath5k/base.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath5k/base.c b/drivers/net/wireless/ath5k/base.c index 4af2607deec..8ef87356e08 100644 --- a/drivers/net/wireless/ath5k/base.c +++ b/drivers/net/wireless/ath5k/base.c @@ -2644,7 +2644,7 @@ ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb) if (skb_headroom(skb) < padsize) { ATH5K_ERR(sc, "tx hdrlen not %%4: %d not enough" " headroom to pad %d\n", hdrlen, padsize); - return -1; + return NETDEV_TX_BUSY; } skb_push(skb, padsize); memmove(skb->data, skb->data+padsize, hdrlen); @@ -2655,7 +2655,7 @@ ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb) ATH5K_ERR(sc, "no further txbuf available, dropping packet\n"); spin_unlock_irqrestore(&sc->txbuflock, flags); ieee80211_stop_queue(hw, skb_get_queue_mapping(skb)); - return -1; + return NETDEV_TX_BUSY; } bf = list_first_entry(&sc->txbuf, struct ath5k_buf, list); list_del(&bf->list); @@ -2673,10 +2673,10 @@ ath5k_tx(struct ieee80211_hw *hw, struct sk_buff *skb) sc->txbuf_len++; spin_unlock_irqrestore(&sc->txbuflock, flags); dev_kfree_skb_any(skb); - return 0; + return NETDEV_TX_OK; } - return 0; + return NETDEV_TX_OK; } static int -- cgit From b973c31a925c6753d84a100673f6b25546ec8b34 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Sat, 27 Dec 2008 22:19:49 +0100 Subject: mac80211: initialize RC data for all mesh links upon allocation This patch fixes a panic that might occur, if the device is part of a mesh and tries to send with a higher rate index than "0". kernel BUG at net/mac80211/rate.c:239! invalid opcode: 0000 [#1] SMP [...] Call Trace: <0> ? invoke_tx_handlers+0x474/0xb57 [mac80211] ? __ieee80211_tx_prepare+0x260/0x2a8 [mac80211] ? ieee80211_master_start_xmit+0x300/0x43a [mac80211] ? __qdisc_run+0xde/0x1da ? net_tx_action+0xb4/0x102 Kernel panic - not syncing: Fatal exception in interrupt Signed-off-by: Christian Lamparter Signed-off-by: John W. Linville --- net/mac80211/mesh_plink.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 929ba542fd7..1159bdb4119 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -107,6 +107,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, sta->flags = WLAN_STA_AUTHORIZED; sta->sta.supp_rates[local->hw.conf.channel->band] = rates; + rate_control_rate_init(sta); return sta; } -- cgit From f1dd2b23badfe8a28910a78be24452c627c4b6f2 Mon Sep 17 00:00:00 2001 From: Ivo van Doorn Date: Sat, 3 Jan 2009 16:27:14 +0100 Subject: rt2x00: Fix rt2500usb HW crypto: WEP 128 & AES The TXD_W0_CIPHER field is a 1-bit field. It only acts as boolean value to indicate if the frame must be encrypted or not. The way rt2x00_set_field32() worked it would grab the least signifcant bit from txdesc->cipher and use that as value. Because of that WEP 64 and TKIP worked since they had odd-numbered values, while WEP 128 and AES were even numbers and didn't work. Correctly booleanize the txdecs->cipher value to allow the hardware to encrypt the outgoing data. After this we can enable HW crypto by default again. Signed-off-by: Ivo van Doorn Signed-off-by: John W. Linville --- drivers/net/wireless/rt2x00/rt2500usb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index 065f111f01f..af6b5847be5 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -38,7 +38,7 @@ /* * Allow hardware encryption to be disabled. */ -static int modparam_nohwcrypt = 1; +static int modparam_nohwcrypt = 0; module_param_named(nohwcrypt, modparam_nohwcrypt, bool, S_IRUGO); MODULE_PARM_DESC(nohwcrypt, "Disable hardware encryption."); @@ -1181,7 +1181,7 @@ static void rt2500usb_write_tx_desc(struct rt2x00_dev *rt2x00dev, test_bit(ENTRY_TXD_FIRST_FRAGMENT, &txdesc->flags)); rt2x00_set_field32(&word, TXD_W0_IFS, txdesc->ifs); rt2x00_set_field32(&word, TXD_W0_DATABYTE_COUNT, skb->len); - rt2x00_set_field32(&word, TXD_W0_CIPHER, txdesc->cipher); + rt2x00_set_field32(&word, TXD_W0_CIPHER, !!txdesc->cipher); rt2x00_set_field32(&word, TXD_W0_KEY_ID, txdesc->key_idx); rt2x00_desc_write(txd, 0, word); } -- cgit From 51e99158d261a5ec5772ca89b935c3daa270b07c Mon Sep 17 00:00:00 2001 From: Andrey Yurovsky Date: Mon, 5 Jan 2009 14:37:31 -0800 Subject: libertas_tf: return NETDEV_TX_OK in TX op The TX op should return NETDEV_TX_OK or NETDEV_TX_BUSY. Signed-off-by: Andrey Yurovsky Signed-off-by: John W. Linville --- drivers/net/wireless/libertas_tf/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c index d1fc305de5f..e7289e2e7f1 100644 --- a/drivers/net/wireless/libertas_tf/main.c +++ b/drivers/net/wireless/libertas_tf/main.c @@ -206,7 +206,7 @@ static int lbtf_op_tx(struct ieee80211_hw *hw, struct sk_buff *skb) * there are no buffered multicast frames to send */ ieee80211_stop_queues(priv->hw); - return 0; + return NETDEV_TX_OK; } static void lbtf_tx_work(struct work_struct *work) -- cgit From e16459c6b7e9c1390020a3e2a033b5383d1c4f3b Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Fri, 2 Jan 2009 10:46:04 +0000 Subject: powerpc: Make dummy section a valid note header We are declaring the dummy section (used to work around a binutils bug) as PT_NOTE, but we don't have enough bytes for it to be a valid note header, and kexec userspace complains: Warning: Elf Note name is not null terminated Warning: append= option is not passed. Using the first kernel root partition Warning: Elf Note name is not null terminated Instead of using the arbitray value 0xf177 (aka "fill"), declare a no-name no-description note of type 0. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/vmlinux.lds.S | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 47bf15cd2c9..161b9b9691f 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -87,7 +87,9 @@ SECTIONS /* The dummy segment contents for the bug workaround mentioned above near PHDRS. */ .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) { - LONG(0xf177) + LONG(0) + LONG(0) + LONG(0) } :kernel :dummy /* -- cgit From 66c721e184e594d5761c5db804ade08fab81930d Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Fri, 2 Jan 2009 10:46:15 +0000 Subject: powerpc/kexec: Check crash_base for relocatable kernel Enforce that the crash kernel region never overlaps the current kernel, as it will be written directly on kexec load. Also, default to the previous KDUMP_KERNELBASE if the start is 0. Other architectures (x86, ia64) state that specifying the start address 0 (or omitting it) will result in the kernel allocating it. Before the relocatable patch in 2.6.28, powerpc would adjust any other start value to the hardcoded KDUMP_KERNELBASE of 32M. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/machine_kexec.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c index b3abebb7ee6..d59e2b1bdcb 100644 --- a/arch/powerpc/kernel/machine_kexec.c +++ b/arch/powerpc/kernel/machine_kexec.c @@ -93,10 +93,35 @@ void __init reserve_crashkernel(void) KDUMP_KERNELBASE); crashk_res.start = KDUMP_KERNELBASE; +#else + if (!crashk_res.start) { + /* + * unspecified address, choose a region of specified size + * can overlap with initrd (ignoring corruption when retained) + * ppc64 requires kernel and some stacks to be in first segemnt + */ + crashk_res.start = KDUMP_KERNELBASE; + } + + crash_base = PAGE_ALIGN(crashk_res.start); + if (crash_base != crashk_res.start) { + printk("Crash kernel base must be aligned to 0x%lx\n", + PAGE_SIZE); + crashk_res.start = crash_base; + } + #endif crash_size = PAGE_ALIGN(crash_size); crashk_res.end = crashk_res.start + crash_size - 1; + /* The crash region must not overlap the current kernel */ + if (overlaps_crashkernel(__pa(_stext), _end - _stext)) { + printk(KERN_WARNING + "Crash kernel can not overlap current kernel\n"); + crashk_res.start = crashk_res.end = 0; + return; + } + /* Crash kernel trumps memory limit */ if (memory_limit && memory_limit <= crashk_res.end) { memory_limit = crashk_res.end + 1; -- cgit From fe333321e2a71f706b794d55b6a3dcb5ab240f65 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 6 Jan 2009 14:26:03 +0000 Subject: powerpc: Change u64/s64 to a long long integer type Convert arch/powerpc/ over to long long based u64: -#ifdef __powerpc64__ -# include -#else -# include -#endif +#include This will avoid reoccuring spurious warnings in core kernel code that comes when people test on their own hardware. (i.e. x86 in ~98% of the cases) This is what x86 uses and it generally helps keep 64-bit code 32-bit clean too. [Adjusted to not impact user mode (from paulus) - sfr] Signed-off-by: Ingo Molnar Signed-off-by: Stephen Rothwell Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/rtas.h | 2 +- arch/powerpc/include/asm/types.h | 7 +++++- arch/powerpc/kernel/dma-iommu.c | 4 +-- arch/powerpc/kernel/iommu.c | 12 ++++----- arch/powerpc/kernel/lparcfg.c | 10 ++++---- arch/powerpc/kernel/setup_64.c | 6 ++--- arch/powerpc/mm/stab.c | 4 +-- arch/powerpc/oprofile/op_model_pa6t.c | 6 ++--- arch/powerpc/platforms/cell/beat_interrupt.c | 2 +- arch/powerpc/platforms/cell/celleb_scc_epci.c | 4 +-- arch/powerpc/platforms/cell/iommu.c | 4 +-- arch/powerpc/platforms/cell/ras.c | 8 +++--- arch/powerpc/platforms/cell/spu_base.c | 4 +-- arch/powerpc/platforms/cell/spu_callbacks.c | 2 +- arch/powerpc/platforms/iseries/iommu.c | 4 +-- arch/powerpc/platforms/pseries/iommu.c | 35 +++++++++++++-------------- arch/powerpc/sysdev/mpic.c | 2 +- drivers/net/pasemi_mac.c | 6 ++--- drivers/pcmcia/electra_cf.c | 2 +- drivers/scsi/ibmvscsi/ibmvscsi.c | 12 ++++----- 20 files changed, 70 insertions(+), 66 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index e0175beb446..0aa0315fb7e 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -18,7 +18,7 @@ */ #define RTAS_UNKNOWN_SERVICE (-1) -#define RTAS_INSTANTIATE_MAX (1UL<<30) /* Don't instantiate rtas at/above this value */ +#define RTAS_INSTANTIATE_MAX (1ULL<<30) /* Don't instantiate rtas at/above this value */ /* Buffer size for ppc_rtas system call. */ #define RTAS_RMOBUF_MAX (64 * 1024) diff --git a/arch/powerpc/include/asm/types.h b/arch/powerpc/include/asm/types.h index c004c13f291..7ce27a52bb3 100644 --- a/arch/powerpc/include/asm/types.h +++ b/arch/powerpc/include/asm/types.h @@ -1,7 +1,12 @@ #ifndef _ASM_POWERPC_TYPES_H #define _ASM_POWERPC_TYPES_H -#ifdef __powerpc64__ +/* + * This is here because we used to use l64 for 64bit powerpc + * and we don't want to impact user mode with our change to ll64 + * in the kernel. + */ +#if defined(__powerpc64__) && !defined(__KERNEL__) # include #else # include diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 14183af1b3f..2983adac8cc 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -79,10 +79,10 @@ static int dma_iommu_dma_supported(struct device *dev, u64 mask) "Warning: IOMMU offset too big for device mask\n"); if (tbl) printk(KERN_INFO - "mask: 0x%08lx, table offset: 0x%08lx\n", + "mask: 0x%08llx, table offset: 0x%08lx\n", mask, tbl->it_offset); else - printk(KERN_INFO "mask: 0x%08lx, table unavailable\n", + printk(KERN_INFO "mask: 0x%08llx, table unavailable\n", mask); return 0; } else diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 1bfa706b96e..fd51578e29d 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -239,12 +239,12 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, if (printk_ratelimit()) { printk(KERN_INFO "iommu_free: invalid entry\n"); printk(KERN_INFO "\tentry = 0x%lx\n", entry); - printk(KERN_INFO "\tdma_addr = 0x%lx\n", (u64)dma_addr); - printk(KERN_INFO "\tTable = 0x%lx\n", (u64)tbl); - printk(KERN_INFO "\tbus# = 0x%lx\n", (u64)tbl->it_busno); - printk(KERN_INFO "\tsize = 0x%lx\n", (u64)tbl->it_size); - printk(KERN_INFO "\tstartOff = 0x%lx\n", (u64)tbl->it_offset); - printk(KERN_INFO "\tindex = 0x%lx\n", (u64)tbl->it_index); + printk(KERN_INFO "\tdma_addr = 0x%llx\n", (u64)dma_addr); + printk(KERN_INFO "\tTable = 0x%llx\n", (u64)tbl); + printk(KERN_INFO "\tbus# = 0x%llx\n", (u64)tbl->it_busno); + printk(KERN_INFO "\tsize = 0x%llx\n", (u64)tbl->it_size); + printk(KERN_INFO "\tstartOff = 0x%llx\n", (u64)tbl->it_offset); + printk(KERN_INFO "\tindex = 0x%llx\n", (u64)tbl->it_index); WARN_ON(1); } return; diff --git a/arch/powerpc/kernel/lparcfg.c b/arch/powerpc/kernel/lparcfg.c index d051e8cbcd0..182e0f642f3 100644 --- a/arch/powerpc/kernel/lparcfg.c +++ b/arch/powerpc/kernel/lparcfg.c @@ -240,7 +240,7 @@ static void parse_ppp_data(struct seq_file *m) if (rc) return; - seq_printf(m, "partition_entitled_capacity=%ld\n", + seq_printf(m, "partition_entitled_capacity=%lld\n", ppp_data.entitlement); seq_printf(m, "group=%d\n", ppp_data.group_num); seq_printf(m, "system_active_processors=%d\n", @@ -265,7 +265,7 @@ static void parse_ppp_data(struct seq_file *m) ppp_data.unallocated_weight); seq_printf(m, "capacity_weight=%d\n", ppp_data.weight); seq_printf(m, "capped=%d\n", ppp_data.capped); - seq_printf(m, "unallocated_capacity=%ld\n", + seq_printf(m, "unallocated_capacity=%lld\n", ppp_data.unallocated_entitlement); } @@ -509,10 +509,10 @@ static ssize_t update_ppp(u64 *entitlement, u8 *weight) } else return -EINVAL; - pr_debug("%s: current_entitled = %lu, current_weight = %u\n", + pr_debug("%s: current_entitled = %llu, current_weight = %u\n", __func__, ppp_data.entitlement, ppp_data.weight); - pr_debug("%s: new_entitled = %lu, new_weight = %u\n", + pr_debug("%s: new_entitled = %llu, new_weight = %u\n", __func__, new_entitled, new_weight); retval = plpar_hcall_norets(H_SET_PPP, new_entitled, new_weight); @@ -558,7 +558,7 @@ static ssize_t update_mpp(u64 *entitlement, u8 *weight) pr_debug("%s: current_entitled = %lu, current_weight = %u\n", __func__, mpp_data.entitled_mem, mpp_data.mem_weight); - pr_debug("%s: new_entitled = %lu, new_weight = %u\n", + pr_debug("%s: new_entitled = %llu, new_weight = %u\n", __func__, new_entitled, new_weight); rc = plpar_hcall_norets(H_SET_MPP, new_entitled, new_weight); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index d8bd2161e73..2d34196bba8 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -434,8 +434,8 @@ void __init setup_system(void) printk("Starting Linux PPC64 %s\n", init_utsname()->version); printk("-----------------------------------------------------\n"); - printk("ppc64_pft_size = 0x%lx\n", ppc64_pft_size); - printk("physicalMemorySize = 0x%lx\n", lmb_phys_mem_size()); + printk("ppc64_pft_size = 0x%llx\n", ppc64_pft_size); + printk("physicalMemorySize = 0x%llx\n", lmb_phys_mem_size()); if (ppc64_caches.dline_size != 0x80) printk("ppc64_caches.dcache_line_size = 0x%x\n", ppc64_caches.dline_size); @@ -493,7 +493,7 @@ static void __init emergency_stack_init(void) * bringup, we need to get at them in real mode. This means they * must also be within the RMO region. */ - limit = min(0x10000000UL, lmb.rmo_size); + limit = min(0x10000000ULL, lmb.rmo_size); for_each_possible_cpu(i) { unsigned long sp; diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 60e6032a808..98cd1dc2ae7 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -251,8 +251,8 @@ void __init stabs_alloc(void) paca[cpu].stab_addr = newstab; paca[cpu].stab_real = virt_to_abs(newstab); - printk(KERN_INFO "Segment table for CPU %d at 0x%lx " - "virtual, 0x%lx absolute\n", + printk(KERN_INFO "Segment table for CPU %d at 0x%llx " + "virtual, 0x%llx absolute\n", cpu, paca[cpu].stab_addr, paca[cpu].stab_real); } } diff --git a/arch/powerpc/oprofile/op_model_pa6t.c b/arch/powerpc/oprofile/op_model_pa6t.c index c40de461fd4..42f778dff91 100644 --- a/arch/powerpc/oprofile/op_model_pa6t.c +++ b/arch/powerpc/oprofile/op_model_pa6t.c @@ -132,7 +132,7 @@ static int pa6t_reg_setup(struct op_counter_config *ctr, for (pmc = 0; pmc < cur_cpu_spec->num_pmcs; pmc++) { /* counters are 40 bit. Move to cputable at some point? */ reset_value[pmc] = (0x1UL << 39) - ctr[pmc].count; - pr_debug("reset_value for pmc%u inited to 0x%lx\n", + pr_debug("reset_value for pmc%u inited to 0x%llx\n", pmc, reset_value[pmc]); } @@ -177,7 +177,7 @@ static int pa6t_start(struct op_counter_config *ctr) oprofile_running = 1; - pr_debug("start on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0); + pr_debug("start on cpu %d, mmcr0 %llx\n", smp_processor_id(), mmcr0); return 0; } @@ -193,7 +193,7 @@ static void pa6t_stop(void) oprofile_running = 0; - pr_debug("stop on cpu %d, mmcr0 %lx\n", smp_processor_id(), mmcr0); + pr_debug("stop on cpu %d, mmcr0 %llx\n", smp_processor_id(), mmcr0); } /* handle the perfmon overflow vector */ diff --git a/arch/powerpc/platforms/cell/beat_interrupt.c b/arch/powerpc/platforms/cell/beat_interrupt.c index 192a9350937..72254848a22 100644 --- a/arch/powerpc/platforms/cell/beat_interrupt.c +++ b/arch/powerpc/platforms/cell/beat_interrupt.c @@ -99,7 +99,7 @@ static void beatic_end_irq(unsigned int irq_plug) err = beat_downcount_of_interrupt(irq_plug); if (err != 0) { if ((err & 0xFFFFFFFF) != 0xFFFFFFF5) /* -11: wrong state */ - panic("Failed to downcount IRQ! Error = %16lx", err); + panic("Failed to downcount IRQ! Error = %16llx", err); printk(KERN_ERR "IRQ over-downcounted, plug %d\n", irq_plug); } diff --git a/arch/powerpc/platforms/cell/celleb_scc_epci.c b/arch/powerpc/platforms/cell/celleb_scc_epci.c index 08c285b10e3..48ec88a38a1 100644 --- a/arch/powerpc/platforms/cell/celleb_scc_epci.c +++ b/arch/powerpc/platforms/cell/celleb_scc_epci.c @@ -405,7 +405,7 @@ static int __init celleb_setup_epci(struct device_node *node, hose->cfg_addr = ioremap(r.start, (r.end - r.start + 1)); if (!hose->cfg_addr) goto error; - pr_debug("EPCI: cfg_addr map 0x%016lx->0x%016lx + 0x%016lx\n", + pr_debug("EPCI: cfg_addr map 0x%016llx->0x%016lx + 0x%016llx\n", r.start, (unsigned long)hose->cfg_addr, (r.end - r.start + 1)); if (of_address_to_resource(node, 2, &r)) @@ -413,7 +413,7 @@ static int __init celleb_setup_epci(struct device_node *node, hose->cfg_data = ioremap(r.start, (r.end - r.start + 1)); if (!hose->cfg_data) goto error; - pr_debug("EPCI: cfg_data map 0x%016lx->0x%016lx + 0x%016lx\n", + pr_debug("EPCI: cfg_data map 0x%016llx->0x%016lx + 0x%016llx\n", r.start, (unsigned long)hose->cfg_data, (r.end - r.start + 1)); hose->ops = &celleb_epci_ops; diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 88d94b59a7c..ee5033eddf0 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -855,7 +855,7 @@ static int __init cell_iommu_init_disabled(void) */ if (np && size < lmb_end_of_DRAM()) { printk(KERN_WARNING "iommu: force-enabled, dma window" - " (%ldMB) smaller than total memory (%ldMB)\n", + " (%ldMB) smaller than total memory (%lldMB)\n", size >> 20, lmb_end_of_DRAM() >> 20); return -ENODEV; } @@ -985,7 +985,7 @@ static void cell_dma_dev_setup_fixed(struct device *dev) addr = cell_iommu_get_fixed_address(dev) + dma_iommu_fixed_base; archdata->dma_data = (void *)addr; - dev_dbg(dev, "iommu: fixed addr = %lx\n", addr); + dev_dbg(dev, "iommu: fixed addr = %llx\n", addr); } static void insert_16M_pte(unsigned long addr, unsigned long *ptab, diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index 7b4cefa2199..5f961c464cc 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -38,16 +38,16 @@ static void dump_fir(int cpu) /* Todo: do some nicer parsing of bits and based on them go down * to other sub-units FIRs and not only IIC */ - printk(KERN_ERR "Global Checkstop FIR : 0x%016lx\n", + printk(KERN_ERR "Global Checkstop FIR : 0x%016llx\n", in_be64(&pregs->checkstop_fir)); - printk(KERN_ERR "Global Recoverable FIR : 0x%016lx\n", + printk(KERN_ERR "Global Recoverable FIR : 0x%016llx\n", in_be64(&pregs->checkstop_fir)); - printk(KERN_ERR "Global MachineCheck FIR : 0x%016lx\n", + printk(KERN_ERR "Global MachineCheck FIR : 0x%016llx\n", in_be64(&pregs->spec_att_mchk_fir)); if (iregs == NULL) return; - printk(KERN_ERR "IOC FIR : 0x%016lx\n", + printk(KERN_ERR "IOC FIR : 0x%016llx\n", in_be64(&iregs->ioc_fir)); } diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index a5bdb89a17c..e487ad68ac1 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -151,7 +151,7 @@ static inline void spu_load_slb(struct spu *spu, int slbe, struct spu_slb *slb) { struct spu_priv2 __iomem *priv2 = spu->priv2; - pr_debug("%s: adding SLB[%d] 0x%016lx 0x%016lx\n", + pr_debug("%s: adding SLB[%d] 0x%016llx 0x%016llx\n", __func__, slbe, slb->vsid, slb->esid); out_be64(&priv2->slb_index_W, slbe); @@ -221,7 +221,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) { int ret; - pr_debug("%s, %lx, %lx\n", __func__, dsisr, ea); + pr_debug("%s, %llx, %lx\n", __func__, dsisr, ea); /* * Handle kernel space hash faults immediately. User hash diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c b/arch/powerpc/platforms/cell/spu_callbacks.c index 19f6bfdbb93..fec1495e6b1 100644 --- a/arch/powerpc/platforms/cell/spu_callbacks.c +++ b/arch/powerpc/platforms/cell/spu_callbacks.c @@ -54,7 +54,7 @@ long spu_sys_callback(struct spu_syscall_block *s) long (*syscall)(u64 a1, u64 a2, u64 a3, u64 a4, u64 a5, u64 a6); if (s->nr_ret >= ARRAY_SIZE(spu_syscall_table)) { - pr_debug("%s: invalid syscall #%ld", __func__, s->nr_ret); + pr_debug("%s: invalid syscall #%lld", __func__, s->nr_ret); return -ENOSYS; } diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c index bbe828f1b88..6ed75bffc8a 100644 --- a/arch/powerpc/platforms/iseries/iommu.c +++ b/arch/powerpc/platforms/iseries/iommu.c @@ -66,7 +66,7 @@ static int tce_build_iSeries(struct iommu_table *tbl, long index, long npages, rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, tce); if (rc) - panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", + panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%llx\n", rc); index++; uaddr += TCE_PAGE_SIZE; @@ -81,7 +81,7 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages) while (npages--) { rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0); if (rc) - panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%lx\n", + panic("PCI_DMA: HvCallXm_setTce failed, Rc: 0x%llx\n", rc); index++; } diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index c90817acb47..3ee01b4f425 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -127,10 +127,10 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum, } if (rc && printk_ratelimit()) { - printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); - printk("\tindex = 0x%lx\n", (u64)tbl->it_index); - printk("\ttcenum = 0x%lx\n", (u64)tcenum); - printk("\ttce val = 0x%lx\n", tce ); + printk("tce_build_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); + printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\ttcenum = 0x%llx\n", (u64)tcenum); + printk("\ttce val = 0x%llx\n", tce ); show_stack(current, (unsigned long *)__get_SP()); } @@ -210,10 +210,10 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum, } if (rc && printk_ratelimit()) { - printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); - printk("\tindex = 0x%lx\n", (u64)tbl->it_index); - printk("\tnpages = 0x%lx\n", (u64)npages); - printk("\ttce[0] val = 0x%lx\n", tcep[0]); + printk("tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); + printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tnpages = 0x%llx\n", (u64)npages); + printk("\ttce[0] val = 0x%llx\n", tcep[0]); show_stack(current, (unsigned long *)__get_SP()); } return ret; @@ -227,9 +227,9 @@ static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0); if (rc && printk_ratelimit()) { - printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%ld\n", rc); - printk("\tindex = 0x%lx\n", (u64)tbl->it_index); - printk("\ttcenum = 0x%lx\n", (u64)tcenum); + printk("tce_free_pSeriesLP: plpar_tce_put failed. rc=%lld\n", rc); + printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\ttcenum = 0x%llx\n", (u64)tcenum); show_stack(current, (unsigned long *)__get_SP()); } @@ -246,9 +246,9 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n if (rc && printk_ratelimit()) { printk("tce_freemulti_pSeriesLP: plpar_tce_stuff failed\n"); - printk("\trc = %ld\n", rc); - printk("\tindex = 0x%lx\n", (u64)tbl->it_index); - printk("\tnpages = 0x%lx\n", (u64)npages); + printk("\trc = %lld\n", rc); + printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\tnpages = 0x%llx\n", (u64)npages); show_stack(current, (unsigned long *)__get_SP()); } } @@ -261,10 +261,9 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum) rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret); if (rc && printk_ratelimit()) { - printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%ld\n", - rc); - printk("\tindex = 0x%lx\n", (u64)tbl->it_index); - printk("\ttcenum = 0x%lx\n", (u64)tcenum); + printk("tce_get_pSeriesLP: plpar_tce_get failed. rc=%lld\n", rc); + printk("\tindex = 0x%llx\n", (u64)tbl->it_index); + printk("\ttcenum = 0x%llx\n", (u64)tcenum); show_stack(current, (unsigned long *)__get_SP()); } diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 3e0d89dcdba..a35297dbac2 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -435,7 +435,7 @@ static void __init mpic_scan_ht_msi(struct mpic *mpic, u8 __iomem *devbase, addr = addr | ((u64)readl(base + HT_MSI_ADDR_HI) << 32); } - printk(KERN_DEBUG "mpic: - HT:%02x.%x %s MSI mapping found @ 0x%lx\n", + printk(KERN_DEBUG "mpic: - HT:%02x.%x %s MSI mapping found @ 0x%llx\n", PCI_SLOT(devfn), PCI_FUNC(devfn), flags & HT_MSI_FLAGS_ENABLE ? "enabled" : "disabled", addr); diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c index 5b7a574ce57..d0349e7d73e 100644 --- a/drivers/net/pasemi_mac.c +++ b/drivers/net/pasemi_mac.c @@ -712,7 +712,7 @@ static inline void pasemi_mac_rx_error(const struct pasemi_mac *mac, rcmdsta = read_dma_reg(PAS_DMA_RXINT_RCMDSTA(mac->dma_if)); ccmdsta = read_dma_reg(PAS_DMA_RXCHAN_CCMDSTA(chan->chno)); - printk(KERN_ERR "pasemi_mac: rx error. macrx %016lx, rx status %lx\n", + printk(KERN_ERR "pasemi_mac: rx error. macrx %016llx, rx status %llx\n", macrx, *chan->status); printk(KERN_ERR "pasemi_mac: rcmdsta %08x ccmdsta %08x\n", @@ -730,8 +730,8 @@ static inline void pasemi_mac_tx_error(const struct pasemi_mac *mac, cmdsta = read_dma_reg(PAS_DMA_TXCHAN_TCMDSTA(chan->chno)); - printk(KERN_ERR "pasemi_mac: tx error. mactx 0x%016lx, "\ - "tx status 0x%016lx\n", mactx, *chan->status); + printk(KERN_ERR "pasemi_mac: tx error. mactx 0x%016llx, "\ + "tx status 0x%016llx\n", mactx, *chan->status); printk(KERN_ERR "pasemi_mac: tcmdsta 0x%08x\n", cmdsta); } diff --git a/drivers/pcmcia/electra_cf.c b/drivers/pcmcia/electra_cf.c index a34284b1482..d187ba4c5e0 100644 --- a/drivers/pcmcia/electra_cf.c +++ b/drivers/pcmcia/electra_cf.c @@ -297,7 +297,7 @@ static int __devinit electra_cf_probe(struct of_device *ofdev, goto fail3; } - dev_info(device, "at mem 0x%lx io 0x%lx irq %d\n", + dev_info(device, "at mem 0x%lx io 0x%llx irq %d\n", cf->mem_phys, io.start, cf->irq); cf->active = 1; diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 5c541f7850f..74d07d137da 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1061,7 +1061,7 @@ static int ibmvscsi_eh_abort_handler(struct scsi_cmnd *cmd) } sdev_printk(KERN_INFO, cmd->device, - "aborting command. lun 0x%lx, tag 0x%lx\n", + "aborting command. lun 0x%llx, tag 0x%llx\n", (((u64) lun) << 48), (u64) found_evt); wait_for_completion(&evt->comp); @@ -1082,7 +1082,7 @@ static int ibmvscsi_eh_abort_handler(struct scsi_cmnd *cmd) if (rsp_rc) { if (printk_ratelimit()) sdev_printk(KERN_WARNING, cmd->device, - "abort code %d for task tag 0x%lx\n", + "abort code %d for task tag 0x%llx\n", rsp_rc, tsk_mgmt->task_tag); return FAILED; } @@ -1102,12 +1102,12 @@ static int ibmvscsi_eh_abort_handler(struct scsi_cmnd *cmd) if (found_evt == NULL) { spin_unlock_irqrestore(hostdata->host->host_lock, flags); - sdev_printk(KERN_INFO, cmd->device, "aborted task tag 0x%lx completed\n", + sdev_printk(KERN_INFO, cmd->device, "aborted task tag 0x%llx completed\n", tsk_mgmt->task_tag); return SUCCESS; } - sdev_printk(KERN_INFO, cmd->device, "successfully aborted task tag 0x%lx\n", + sdev_printk(KERN_INFO, cmd->device, "successfully aborted task tag 0x%llx\n", tsk_mgmt->task_tag); cmd->result = (DID_ABORT << 16); @@ -1182,7 +1182,7 @@ static int ibmvscsi_eh_device_reset_handler(struct scsi_cmnd *cmd) return FAILED; } - sdev_printk(KERN_INFO, cmd->device, "resetting device. lun 0x%lx\n", + sdev_printk(KERN_INFO, cmd->device, "resetting device. lun 0x%llx\n", (((u64) lun) << 48)); wait_for_completion(&evt->comp); @@ -1203,7 +1203,7 @@ static int ibmvscsi_eh_device_reset_handler(struct scsi_cmnd *cmd) if (rsp_rc) { if (printk_ratelimit()) sdev_printk(KERN_WARNING, cmd->device, - "reset code %d for task tag 0x%lx\n", + "reset code %d for task tag 0x%llx\n", rsp_rc, tsk_mgmt->task_tag); return FAILED; } -- cgit From 9477e455b4b3ded3e7add8c96d8293105947eaac Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 6 Jan 2009 14:27:38 +0000 Subject: powerpc: Cleanup from l64 to ll64 change: arch code Signed-off-by: Stephen Rothwell Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/pci_64.c | 6 +++--- arch/powerpc/platforms/cell/spufs/coredump.c | 2 +- arch/powerpc/platforms/cell/spufs/fault.c | 2 +- arch/powerpc/platforms/cell/spufs/file.c | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index 586962f65c2..ea8eda8c87c 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -470,7 +470,7 @@ int __devinit pcibios_map_io_space(struct pci_bus *bus) if (bus->self) { pr_debug("IO mapping for PCI-PCI bridge %s\n", pci_name(bus->self)); - pr_debug(" virt=0x%016lx...0x%016lx\n", + pr_debug(" virt=0x%016llx...0x%016llx\n", bus->resource[0]->start + _IO_BASE, bus->resource[0]->end + _IO_BASE); return 0; @@ -502,7 +502,7 @@ int __devinit pcibios_map_io_space(struct pci_bus *bus) hose->io_base_phys - phys_page); pr_debug("IO mapping for PHB %s\n", hose->dn->full_name); - pr_debug(" phys=0x%016lx, virt=0x%p (alloc=0x%p)\n", + pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n", hose->io_base_phys, hose->io_base_virt, hose->io_base_alloc); pr_debug(" size=0x%016lx (alloc=0x%016lx)\n", hose->pci_io_size, size_page); @@ -517,7 +517,7 @@ int __devinit pcibios_map_io_space(struct pci_bus *bus) hose->io_resource.start += io_virt_offset; hose->io_resource.end += io_virt_offset; - pr_debug(" hose->io_resource=0x%016lx...0x%016lx\n", + pr_debug(" hose->io_resource=0x%016llx...0x%016llx\n", hose->io_resource.start, hose->io_resource.end); return 0; diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index af116aadba1..c4d4a19235e 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -42,7 +42,7 @@ static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer, return spufs_coredump_read[num].read(ctx, buffer, size, off); data = spufs_coredump_read[num].get(ctx); - ret = snprintf(buffer, size, "0x%.16lx", data); + ret = snprintf(buffer, size, "0x%.16llx", data); if (ret >= size) return size; return ++ret; /* count trailing NULL */ diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index f093a581ac7..a4dd3ae7223 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -132,7 +132,7 @@ int spufs_handle_class1(struct spu_context *ctx) spuctx_switch_state(ctx, SPU_UTIL_IOWAIT); - pr_debug("ctx %p: ea %016lx, dsisr %016lx state %d\n", ctx, ea, + pr_debug("ctx %p: ea %016llx, dsisr %016llx state %d\n", ctx, ea, dsisr, ctx->state); ctx->stats.hash_flt++; diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 7106b63d401..0da7f2bf5ee 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -1654,7 +1654,7 @@ out: static int spufs_check_valid_dma(struct mfc_dma_command *cmd) { - pr_debug("queueing DMA %x %lx %x %x %x\n", cmd->lsa, + pr_debug("queueing DMA %x %llx %x %x %x\n", cmd->lsa, cmd->ea, cmd->size, cmd->tag, cmd->cmd); switch (cmd->cmd) { @@ -1671,7 +1671,7 @@ static int spufs_check_valid_dma(struct mfc_dma_command *cmd) } if ((cmd->lsa & 0xf) != (cmd->ea &0xf)) { - pr_debug("invalid DMA alignment, ea %lx lsa %x\n", + pr_debug("invalid DMA alignment, ea %llx lsa %x\n", cmd->ea, cmd->lsa); return -EIO; } @@ -2633,7 +2633,7 @@ static int spufs_show_ctx(struct seq_file *s, void *private) } seq_printf(s, "%c flgs(%lx) sflgs(%lx) pri(%d) ts(%d) spu(%02d)" - " %c %lx %lx %lx %lx %x %x\n", + " %c %llx %llx %llx %llx %x %x\n", ctx->state == SPU_STATE_SAVED ? 'S' : 'R', ctx->flags, ctx->sched_flags, -- cgit From 1901515c79f13d9a1913d246b6c0711318dadb4c Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 6 Jan 2009 14:28:48 +0000 Subject: powerpc: Cleanup from l64 to ll64 change: drivers/char This is a powerpc specific driver. Signed-off-by: Stephen Rothwell Signed-off-by: Benjamin Herrenschmidt --- drivers/char/bsr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/bsr.c b/drivers/char/bsr.c index 977dfb1096a..f6094ae0ef3 100644 --- a/drivers/char/bsr.c +++ b/drivers/char/bsr.c @@ -103,7 +103,7 @@ static ssize_t bsr_len_show(struct device *dev, struct device_attribute *attr, char *buf) { struct bsr_dev *bsr_dev = dev_get_drvdata(dev); - return sprintf(buf, "%lu\n", bsr_dev->bsr_len); + return sprintf(buf, "%llu\n", bsr_dev->bsr_len); } static struct device_attribute bsr_dev_attrs[] = { -- cgit From a1c5a8932bbb75b550deb156d890027827fc9d6e Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 6 Jan 2009 14:40:06 +0000 Subject: powerpc: Cleanup from l64 to ll64 change: drivers/net These are powerpc specific drivers. Signed-off-by: Stephen Rothwell Acked-by: David S. Miller Signed-off-by: Benjamin Herrenschmidt --- drivers/net/ehea/ehea_main.c | 8 ++++---- drivers/net/ehea/ehea_qmr.c | 18 +++++++++--------- drivers/net/ibmveth.c | 16 ++++++++-------- drivers/net/iseries_veth.c | 2 +- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index e3131ea629c..dfe92264e82 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c @@ -132,7 +132,7 @@ void ehea_dump(void *adr, int len, char *msg) int x; unsigned char *deb = adr; for (x = 0; x < len; x += 16) { - printk(DRV_NAME " %s adr=%p ofs=%04x %016lx %016lx\n", msg, + printk(DRV_NAME " %s adr=%p ofs=%04x %016llx %016llx\n", msg, deb, x, *((u64 *)&deb[0]), *((u64 *)&deb[8])); deb += 16; } @@ -883,7 +883,7 @@ static irqreturn_t ehea_qp_aff_irq_handler(int irq, void *param) while (eqe) { qp_token = EHEA_BMASK_GET(EHEA_EQE_QP_TOKEN, eqe->entry); - ehea_error("QP aff_err: entry=0x%lx, token=0x%x", + ehea_error("QP aff_err: entry=0x%llx, token=0x%x", eqe->entry, qp_token); qp = port->port_res[qp_token].qp; @@ -1159,7 +1159,7 @@ static void ehea_parse_eqe(struct ehea_adapter *adapter, u64 eqe) netif_stop_queue(port->netdev); break; default: - ehea_error("unknown event code %x, eqe=0x%lX", ec, eqe); + ehea_error("unknown event code %x, eqe=0x%llX", ec, eqe); break; } } @@ -1971,7 +1971,7 @@ static void ehea_set_multicast_list(struct net_device *dev) } if (dev->mc_count > port->adapter->max_mc_mac) { - ehea_info("Mcast registration limit reached (0x%lx). " + ehea_info("Mcast registration limit reached (0x%llx). " "Use ALLMULTI!", port->adapter->max_mc_mac); goto out; diff --git a/drivers/net/ehea/ehea_qmr.c b/drivers/net/ehea/ehea_qmr.c index 225c692b5d9..49d766ebbcf 100644 --- a/drivers/net/ehea/ehea_qmr.c +++ b/drivers/net/ehea/ehea_qmr.c @@ -168,7 +168,7 @@ struct ehea_cq *ehea_create_cq(struct ehea_adapter *adapter, cq->fw_handle, rpage, 1); if (hret < H_SUCCESS) { ehea_error("register_rpage_cq failed ehea_cq=%p " - "hret=%lx counter=%i act_pages=%i", + "hret=%llx counter=%i act_pages=%i", cq, hret, counter, cq->attr.nr_pages); goto out_kill_hwq; } @@ -178,13 +178,13 @@ struct ehea_cq *ehea_create_cq(struct ehea_adapter *adapter, if ((hret != H_SUCCESS) || (vpage)) { ehea_error("registration of pages not " - "complete hret=%lx\n", hret); + "complete hret=%llx\n", hret); goto out_kill_hwq; } } else { if (hret != H_PAGE_REGISTERED) { ehea_error("CQ: registration of page failed " - "hret=%lx\n", hret); + "hret=%llx\n", hret); goto out_kill_hwq; } } @@ -986,15 +986,15 @@ void print_error_data(u64 *data) length = EHEA_PAGESIZE; if (type == 0x8) /* Queue Pair */ - ehea_error("QP (resource=%lX) state: AER=0x%lX, AERR=0x%lX, " - "port=%lX", resource, data[6], data[12], data[22]); + ehea_error("QP (resource=%llX) state: AER=0x%llX, AERR=0x%llX, " + "port=%llX", resource, data[6], data[12], data[22]); if (type == 0x4) /* Completion Queue */ - ehea_error("CQ (resource=%lX) state: AER=0x%lX", resource, + ehea_error("CQ (resource=%llX) state: AER=0x%llX", resource, data[6]); if (type == 0x3) /* Event Queue */ - ehea_error("EQ (resource=%lX) state: AER=0x%lX", resource, + ehea_error("EQ (resource=%llX) state: AER=0x%llX", resource, data[6]); ehea_dump(data, length, "error data"); @@ -1016,11 +1016,11 @@ void ehea_error_data(struct ehea_adapter *adapter, u64 res_handle) rblock); if (ret == H_R_STATE) - ehea_error("No error data is available: %lX.", res_handle); + ehea_error("No error data is available: %llX.", res_handle); else if (ret == H_SUCCESS) print_error_data(rblock); else - ehea_error("Error data could not be fetched: %lX", res_handle); + ehea_error("Error data could not be fetched: %llX", res_handle); kfree(rblock); } diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index ca3bb9f7321..dfa6348ac1d 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -602,7 +602,7 @@ static int ibmveth_open(struct net_device *netdev) if(lpar_rc != H_SUCCESS) { ibmveth_error_printk("h_register_logical_lan failed with %ld\n", lpar_rc); - ibmveth_error_printk("buffer TCE:0x%lx filter TCE:0x%lx rxq desc:0x%lx MAC:0x%lx\n", + ibmveth_error_printk("buffer TCE:0x%llx filter TCE:0x%llx rxq desc:0x%llx MAC:0x%llx\n", adapter->buffer_list_dma, adapter->filter_list_dma, rxq_desc.desc, @@ -1378,13 +1378,13 @@ static int ibmveth_show(struct seq_file *seq, void *v) seq_printf(seq, "Firmware MAC: %pM\n", firmware_mac); seq_printf(seq, "\nAdapter Statistics:\n"); - seq_printf(seq, " TX: vio_map_single failres: %ld\n", adapter->tx_map_failed); - seq_printf(seq, " send failures: %ld\n", adapter->tx_send_failed); - seq_printf(seq, " RX: replenish task cycles: %ld\n", adapter->replenish_task_cycles); - seq_printf(seq, " alloc_skb_failures: %ld\n", adapter->replenish_no_mem); - seq_printf(seq, " add buffer failures: %ld\n", adapter->replenish_add_buff_failure); - seq_printf(seq, " invalid buffers: %ld\n", adapter->rx_invalid_buffer); - seq_printf(seq, " no buffers: %ld\n", adapter->rx_no_buffer); + seq_printf(seq, " TX: vio_map_single failres: %lld\n", adapter->tx_map_failed); + seq_printf(seq, " send failures: %lld\n", adapter->tx_send_failed); + seq_printf(seq, " RX: replenish task cycles: %lld\n", adapter->replenish_task_cycles); + seq_printf(seq, " alloc_skb_failures: %lld\n", adapter->replenish_no_mem); + seq_printf(seq, " add buffer failures: %lld\n", adapter->replenish_add_buff_failure); + seq_printf(seq, " invalid buffers: %lld\n", adapter->rx_invalid_buffer); + seq_printf(seq, " no buffers: %lld\n", adapter->rx_no_buffer); return 0; } diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c index c7457f97259..cb793c2bade 100644 --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c @@ -429,7 +429,7 @@ SIMPLE_PORT_ATTR(promiscuous); SIMPLE_PORT_ATTR(num_mcast); CUSTOM_PORT_ATTR(lpar_map, "0x%X\n", port->lpar_map); CUSTOM_PORT_ATTR(stopped_map, "0x%X\n", port->stopped_map); -CUSTOM_PORT_ATTR(mac_addr, "0x%lX\n", port->mac_addr); +CUSTOM_PORT_ATTR(mac_addr, "0x%llX\n", port->mac_addr); #define GET_PORT_ATTR(_name) (&veth_port_attr_##_name.attr) static struct attribute *veth_port_default_attrs[] = { -- cgit From 9fef3d2d15ae8ca24e4a145f2e189eea145d18c2 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 8 Jan 2009 02:14:18 +0000 Subject: hvc_console: Change an mb() to smp_mb() and add some comments I remember some history on this barrier. There was a race between open via /dev/console and the tty being fully setup. Its also why there is a temporary variable and the global is assigned at the end of the function. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- drivers/char/hvc_console.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 5a8a4c28c86..f2685b747fb 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -876,8 +876,11 @@ static int hvc_init(void) goto stop_thread; } - /* FIXME: This mb() seems completely random. Remove it. */ - mb(); + /* + * Make sure tty is fully registered before allowing it to be + * found by hvc_console_device. + */ + smp_mb(); hvc_driver = drv; return 0; -- cgit From c21f7a527f7757a0e246cea521a5dd3b8e1224d5 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 8 Jan 2009 02:14:21 +0000 Subject: hvc_console: Call free_irq() only if request_irq() was successful Only call free_irq if we marked the request_irq has having succeeded instead of whenever the the sub-driver identified the interrupt to use. Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- drivers/char/hvc_irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/hvc_irq.c b/drivers/char/hvc_irq.c index d09e5688d44..2623e177e8d 100644 --- a/drivers/char/hvc_irq.c +++ b/drivers/char/hvc_irq.c @@ -37,7 +37,7 @@ int notifier_add_irq(struct hvc_struct *hp, int irq) void notifier_del_irq(struct hvc_struct *hp, int irq) { - if (!irq) + if (!hp->irq_requested) return; free_irq(irq, hp); hp->irq_requested = 0; -- cgit From da9dc13289fa58dced12f2baff51dfb87c339ba3 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 8 Jan 2009 02:14:24 +0000 Subject: hvc_console: Do not set low_latency when using interrupts hvc_console is setting low_latency unconditionally, but some clients are interrupt driven and will call hvc_poll from irq context. This will cause tty_flip_buffer_push to be called from irq context, and it very clearly states it must not be called from IRQ when low_latency is specified. Looking back through history: v2.6.16-rc1 via 33f0f88f1c51ae5c2d593d26960c760ea154c2e2 [PATCH] TTY layer buffering revamp added this new api. v2.6.16-rc3 via 8977d929e49021d9a6e031310aab01fa72f849c2 [PATCH] tty buffering stall fix claims to fix a stall discovered with hvc_console v2.6.16-rc5 via fb5c594c2acc441f0d2d8f457484a0e0e9285db3 [PATCH] Fix race condition in hvc console. said set this flag to avoid a stall problem, and was merged through the powerpc arch tree. Without searching for email discussions, it would appear to be an overlapping "fix", but one that did not consider all users. Signed-off-by: Benjamin Herrenschmidt --- drivers/char/hvc_console.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index f2685b747fb..0e18fdb9ddd 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -318,7 +318,8 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) } /* else count == 0 */ tty->driver_data = hp; - tty->low_latency = 1; /* Makes flushes to ldisc synchronous. */ + if (!hp->irq_requested) + tty->low_latency = 1; /* Makes flushes to ldisc synchronous. */ hp->tty = tty; -- cgit From 2da7582f7cf5ef5e6adcf42537b6b8be06312152 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 8 Jan 2009 02:14:28 +0000 Subject: hvc_console: Use kzalloc() instead of kmalloc() + memset() Replace kmalloc() + memset() with kzalloc(). Signed-off-by: Milton Miller Signed-off-by: Benjamin Herrenschmidt --- drivers/char/hvc_console.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 0e18fdb9ddd..09676b4e5d8 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -765,13 +765,11 @@ struct hvc_struct __devinit *hvc_alloc(uint32_t vtermno, int data, return ERR_PTR(err); } - hp = kmalloc(ALIGN(sizeof(*hp), sizeof(long)) + outbuf_size, + hp = kzalloc(ALIGN(sizeof(*hp), sizeof(long)) + outbuf_size, GFP_KERNEL); if (!hp) return ERR_PTR(-ENOMEM); - memset(hp, 0x00, sizeof(*hp)); - hp->vtermno = vtermno; hp->data = data; hp->ops = ops; -- cgit From fc7a9feb9c9df50ed6d115514b48c49e8511a4de Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 9 Jan 2009 13:12:44 +0000 Subject: powerpc/cacheinfo: Rename cache_dir per-cpu variable The per_cpu__ prefix on DECLARE_PER_CPU'd variables is going away; rename cache_dir to cache_dir_pcpu. Signed-off-by: Nathan Lynch Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cacheinfo.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index b33f0417a4b..bb37b1d19a5 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -113,7 +113,7 @@ struct cache { struct cache *next_local; /* next cache of >= level */ }; -static DEFINE_PER_CPU(struct cache_dir *, cache_dir); +static DEFINE_PER_CPU(struct cache_dir *, cache_dir_pcpu); /* traversal/modification of this list occurs only at cpu hotplug time; * access is serialized by cpu hotplug locking @@ -468,9 +468,9 @@ static struct cache_dir *__cpuinit cacheinfo_create_cache_dir(unsigned int cpu_i cache_dir->kobj = kobj; - WARN_ON_ONCE(per_cpu(cache_dir, cpu_id) != NULL); + WARN_ON_ONCE(per_cpu(cache_dir_pcpu, cpu_id) != NULL); - per_cpu(cache_dir, cpu_id) = cache_dir; + per_cpu(cache_dir_pcpu, cpu_id) = cache_dir; return cache_dir; err: @@ -820,13 +820,13 @@ void cacheinfo_cpu_offline(unsigned int cpu_id) /* Prevent userspace from seeing inconsistent state - remove * the sysfs hierarchy first */ - cache_dir = per_cpu(cache_dir, cpu_id); + cache_dir = per_cpu(cache_dir_pcpu, cpu_id); /* careful, sysfs population may have failed */ if (cache_dir) remove_cache_dir(cache_dir); - per_cpu(cache_dir, cpu_id) = NULL; + per_cpu(cache_dir_pcpu, cpu_id) = NULL; /* clear the CPU's bit in its cache chain, possibly freeing * cache objects */ -- cgit From c478b58135e6c9b49c8b80bff8ef910f2ba9b313 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Sun, 11 Jan 2009 19:03:45 +0000 Subject: powerpc/powermac: Fix occasional SMP boot failure The PowerMac kernel occasionally fails to bring up the secondary CPUs on SMP, the trigger factor seem to be fairly random and related to location of code and data. This appears to be due to the initial loading of the TOC value by the secondary processor which now happens before we clear HID4:RM_CI (Real Mode Cache Invalidate). This bit should really be cleared before we do any load or store other than fetching code. This fix works based on the assumption that all SMP 64-bit PowerMacs use variants of the 970, which fortunately is true, by explicitely clearing that bit, adding an slbia for good measure as RM_CI mode is known to create bogus ERAT entries. I also removed some spurrious debug output that was left enabled by mistake while at it. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/head_64.S | 9 +++++++++ arch/powerpc/platforms/powermac/smp.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index b4bcf5a930f..ebaedafc8e6 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -1518,6 +1518,15 @@ _GLOBAL(pmac_secondary_start) /* turn on 64-bit mode */ bl .enable_64b_mode + li r0,0 + mfspr r3,SPRN_HID4 + rldimi r3,r0,40,23 /* clear bit 23 (rm_ci) */ + sync + mtspr SPRN_HID4,r3 + isync + sync + slbia + /* get TOC pointer (real address) */ bl .relative_toc diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index 6b0711c15ec..bd8817b00fa 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -53,7 +53,7 @@ #include #include -#define DEBUG +#undef DEBUG #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) -- cgit From 061b908cf888c1bfd3dd1b91ce6f676e3bc42633 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 12 Jan 2009 16:30:00 +0000 Subject: powerpc: Enable PS3 options and QPACE in ppc64_defconfig To increase the amount of code that's built for a defconfig build. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/configs/ppc64_defconfig | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 069ae1bbac2..d4685d1c0be 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -211,11 +211,28 @@ CONFIG_PPC_PASEMI=y CONFIG_PPC_PASEMI_IOMMU=y # CONFIG_PPC_PASEMI_IOMMU_DMA_FORCE is not set CONFIG_PPC_PASEMI_MDIO=y -# CONFIG_PPC_PS3 is not set +CONFIG_PPC_PS3=y + +# +# PS3 Platform Options +# +# CONFIG_PS3_ADVANCED is not set +CONFIG_PS3_HTAB_SIZE=20 +# CONFIG_PS3_DYNAMIC_DMA is not set +CONFIG_PS3_VUART=y +CONFIG_PS3_PS3AV=y +CONFIG_PS3_SYS_MANAGER=y +CONFIG_PS3_STORAGE=m +CONFIG_PS3_DISK=m +CONFIG_PS3_ROM=m +CONFIG_PS3_FLASH=m +CONFIG_PS3_LPM=m CONFIG_PPC_CELL=y +CONFIG_PPC_CELL_COMMON=y CONFIG_PPC_CELL_NATIVE=y CONFIG_PPC_IBM_CELL_BLADE=y CONFIG_PPC_CELLEB=y +CONFIG_PPC_CELL_QPACE=y # # Cell Broadband Engine options @@ -981,6 +998,9 @@ CONFIG_E1000=y CONFIG_TIGON3=y # CONFIG_BNX2 is not set CONFIG_SPIDER_NET=m +CONFIG_GELIC_NET=m +CONFIG_GELIC_WIRELESS=y +# CONFIG_GELIC_WIRELESS_OLD_PSK_INTERFACE is not set # CONFIG_QLA3XXX is not set # CONFIG_ATL1 is not set # CONFIG_ATL1E is not set @@ -1370,6 +1390,8 @@ CONFIG_FB_RADEON_BACKLIGHT=y # CONFIG_FB_PM3 is not set # CONFIG_FB_CARMINE is not set CONFIG_FB_IBM_GXT4500=y +CONFIG_FB_PS3=m +CONFIG_FB_PS3_DEFAULT_SIZE_M=9 # CONFIG_FB_VIRTUAL is not set # CONFIG_FB_METRONOME is not set CONFIG_BACKLIGHT_LCD_SUPPORT=y @@ -1492,6 +1514,8 @@ CONFIG_SND_PCI=y CONFIG_SND_PPC=y CONFIG_SND_POWERMAC=m CONFIG_SND_POWERMAC_AUTO_DRC=y +CONFIG_SND_PS3=m +CONFIG_SND_PS3_DEFAULT_START_DELAY=2000 CONFIG_SND_AOA=m CONFIG_SND_AOA_FABRIC_LAYOUT=m CONFIG_SND_AOA_ONYX=m -- cgit From d9736749f581abd80c2831244e2659e2e833b0e3 Mon Sep 17 00:00:00 2001 From: Krzysztof HaÅ‚asa Date: Mon, 12 Jan 2009 16:31:54 -0800 Subject: WAN: Fix NAPI interface in IXP4xx HSS driver. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Krzysztof HaÅ‚asa Signed-off-by: David S. Miller --- drivers/net/wan/ixp4xx_hss.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c index 2dc241689d3..0dbd85b0162 100644 --- a/drivers/net/wan/ixp4xx_hss.c +++ b/drivers/net/wan/ixp4xx_hss.c @@ -622,7 +622,7 @@ static void hss_hdlc_rx_irq(void *pdev) printk(KERN_DEBUG "%s: hss_hdlc_rx_irq\n", dev->name); #endif qmgr_disable_irq(queue_ids[port->id].rx); - netif_rx_schedule(dev, &port->napi); + netif_rx_schedule(&port->napi); } static int hss_hdlc_poll(struct napi_struct *napi, int budget) @@ -651,7 +651,7 @@ static int hss_hdlc_poll(struct napi_struct *napi, int budget) printk(KERN_DEBUG "%s: hss_hdlc_poll" " netif_rx_complete\n", dev->name); #endif - netif_rx_complete(dev, napi); + netif_rx_complete(napi); qmgr_enable_irq(rxq); if (!qmgr_stat_empty(rxq) && netif_rx_reschedule(napi)) { @@ -1069,7 +1069,7 @@ static int hss_hdlc_open(struct net_device *dev) hss_start_hdlc(port); /* we may already have RX data, enables IRQ */ - netif_rx_schedule(dev, &port->napi); + netif_rx_schedule(&port->napi); return 0; err_unlock: -- cgit From 985ebdb5ed54151eba734aa1b307460e8e4267ba Mon Sep 17 00:00:00 2001 From: Krzysztof HaÅ‚asa Date: Mon, 12 Jan 2009 16:32:13 -0800 Subject: net: Fix a comment in include/linux/netdevice.h. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix a comment in include/linux/netdevice.h. Signed-off-by: Krzysztof HaÅ‚asa Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f2455681337..4647604c7ca 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -467,7 +467,7 @@ struct netdev_queue { * This function is called when network device transistions to the down * state. * - * int (*ndo_hard_start_xmit)(struct sk_buff *skb, struct net_device *dev); + * int (*ndo_start_xmit)(struct sk_buff *skb, struct net_device *dev); * Called when a packet needs to be transmitted. * Must return NETDEV_TX_OK , NETDEV_TX_BUSY, or NETDEV_TX_LOCKED, * Required can not be NULL. -- cgit From 88843104a19d5896bf67ab6bd685e976240dd04a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 12 Jan 2009 00:06:00 +0000 Subject: netfilter 01/09: remove "happy cracking" message Don't spam logs for locally generated short packets. these can only be generated by root. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/iptable_filter.c | 7 +------ net/ipv4/netfilter/iptable_mangle.c | 6 +----- net/ipv4/netfilter/iptable_raw.c | 6 +----- net/ipv4/netfilter/iptable_security.c | 6 +----- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 5 +---- 5 files changed, 5 insertions(+), 25 deletions(-) diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index c9224310eba..52cb6939d09 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -93,13 +93,8 @@ ipt_local_out_hook(unsigned int hook, { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("iptable_filter: ignoring short SOCK_RAW " - "packet.\n"); + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } - return ipt_do_table(skb, hook, in, out, dev_net(out)->ipv4.iptable_filter); } diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 69f2c428714..3929d20b9e4 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -132,12 +132,8 @@ ipt_local_hook(unsigned int hook, /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("iptable_mangle: ignoring short SOCK_RAW " - "packet.\n"); + || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } /* Save things which could affect route */ mark = skb->mark; diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 8faebfe638f..7f65d18333e 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -65,12 +65,8 @@ ipt_local_hook(unsigned int hook, { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("iptable_raw: ignoring short SOCK_RAW " - "packet.\n"); + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } return ipt_do_table(skb, hook, in, out, dev_net(out)->ipv4.iptable_raw); } diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index 36f3be3cc42..a52a35f4a58 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -96,12 +96,8 @@ ipt_local_out_hook(unsigned int hook, { /* Somebody is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk(KERN_INFO "iptable_security: ignoring short " - "SOCK_RAW packet.\n"); + || ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } return ipt_do_table(skb, hook, in, out, dev_net(out)->ipv4.iptable_security); } diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index b2141e11575..4beb04fac58 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -145,11 +145,8 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("ipt_hook: happy cracking.\n"); + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); } -- cgit From a2bd40ad3151d4d346fd167e01fb84b06f7247fc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 12 Jan 2009 00:06:02 +0000 Subject: netfilter 02/09: bridge: Fix handling of non-IP packets in FORWARD/POST_ROUTING Currently the bridge FORWARD/POST_ROUTING chains treats all non-IPv4 packets as IPv6. This packet fixes that by returning NF_ACCEPT on non-IP packets instead, just as is done in PRE_ROUTING. Signed-off-by: Herbert Xu Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index a65e43a17fb..9a1cd757ec4 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -686,8 +686,11 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) pf = PF_INET; - else + else if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || + IS_PPPOE_IPV6(skb)) pf = PF_INET6; + else + return NF_ACCEPT; nf_bridge_pull_encap_header(skb); @@ -828,8 +831,11 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb) || IS_PPPOE_IP(skb)) pf = PF_INET; - else + else if (skb->protocol == htons(ETH_P_IPV6) || IS_VLAN_IPV6(skb) || + IS_PPPOE_IPV6(skb)) pf = PF_INET6; + else + return NF_ACCEPT; #ifdef CONFIG_NETFILTER_DEBUG if (skb->dst == NULL) { -- cgit From 47e0e1ca13d64eeeb687995fbe4e239e743d7544 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 12 Jan 2009 00:06:03 +0000 Subject: netfilter 03/09: bridge: Disable PPPOE/VLAN processing by default The PPPOE/VLAN processing code in the bridge netfilter is broken by design. The VLAN tag and the PPPOE session ID are an integral part of the packet flow information, yet they're completely ignored by the bridge netfilter. This is potentially a security hole as it treats all VLANs and PPPOE sessions as the same. What's more, it's actually broken for PPPOE as the bridge netfilter tries to trim the packets to the IP length without adjusting the PPPOE header (and adjusting the PPPOE header isn't much better since the PPPOE peer may require the padding to be present). Therefore we should disable this by default. It does mean that people relying on this feature may lose networking depending on how their bridge netfilter rules are configured. However, IMHO the problems this code causes are serious enough to warrant this. Signed-off-by: Herbert Xu Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/br_netfilter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 9a1cd757ec4..cf754ace0b7 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -58,11 +58,11 @@ static struct ctl_table_header *brnf_sysctl_header; static int brnf_call_iptables __read_mostly = 1; static int brnf_call_ip6tables __read_mostly = 1; static int brnf_call_arptables __read_mostly = 1; -static int brnf_filter_vlan_tagged __read_mostly = 1; -static int brnf_filter_pppoe_tagged __read_mostly = 1; +static int brnf_filter_vlan_tagged __read_mostly = 0; +static int brnf_filter_pppoe_tagged __read_mostly = 0; #else -#define brnf_filter_vlan_tagged 1 -#define brnf_filter_pppoe_tagged 1 +#define brnf_filter_vlan_tagged 0 +#define brnf_filter_pppoe_tagged 0 #endif static inline __be16 vlan_proto(const struct sk_buff *skb) -- cgit From 656caff20e12ba6e07b4bf342641df5ab33b4e49 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 12 Jan 2009 00:06:04 +0000 Subject: netfilter 04/09: x_tables: fix match/target revision lookup Commit 55b69e91 (netfilter: implement NFPROTO_UNSPEC as a wildcard for extensions) broke revision probing for matches and targets that are registered with NFPROTO_UNSPEC. Fix by continuing the search on the NFPROTO_UNSPEC list if nothing is found on the af-specific lists. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/x_tables.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 89837a4eef7..bfbf521f6ea 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -273,6 +273,10 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) have_rev = 1; } } + + if (af != NFPROTO_UNSPEC && !have_rev) + return match_revfn(NFPROTO_UNSPEC, name, revision, bestp); + return have_rev; } @@ -289,6 +293,10 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp) have_rev = 1; } } + + if (af != NFPROTO_UNSPEC && !have_rev) + return target_revfn(NFPROTO_UNSPEC, name, revision, bestp); + return have_rev; } -- cgit From d61ba9fd55b52a10b8e0ffd39bbc33587d3bfc8d Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 12 Jan 2009 00:06:06 +0000 Subject: netfilter 05/09: ebtables: fix inversion in match code Commit 8cc784ee (netfilter: change return types of match functions for ebtables extensions) broke ebtables matches by inverting the sense of match/nomatch. Reported-by: Matt Cross Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/netfilter/ebtables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 8a8743d7d6e..820252aee81 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -79,7 +79,7 @@ static inline int ebt_do_match (struct ebt_entry_match *m, { par->match = m->u.match; par->matchinfo = m->data; - return m->u.match->match(skb, par); + return m->u.match->match(skb, par) ? EBT_MATCH : EBT_NOMATCH; } static inline int ebt_dev_check(char *entry, const struct net_device *device) -- cgit From 71320afcdb33b3f0b754ba1fac6a8c77aa469041 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 12 Jan 2009 00:06:07 +0000 Subject: netfilter 06/09: nf_conntrack: fix ICMP/ICMPv6 timeout sysctls on big-endian An old bug crept back into the ICMP/ICMPv6 conntrack protocols: the timeout values are defined as unsigned longs, the sysctl's maxsize is set to sizeof(unsigned int). Use unsigned int for the timeout values as in the other conntrack protocols. Reported-by: Jean-Mickael Guerin Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 2 +- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 1fd3ef7718b..2a8bee26f43 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -20,7 +20,7 @@ #include #include -static unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; +static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index bd52151d31e..c455cf4ee75 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -26,7 +26,7 @@ #include #include -static unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; +static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ; static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, -- cgit From cd7fcbf1cb6933bfb9171452b4a370c92923544d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 12 Jan 2009 00:06:08 +0000 Subject: netfilter 07/09: simplify nf_conntrack_alloc() error handling nf_conntrack_alloc cannot return NULL, so there is no need to check for NULL before using the value. I have also removed the initialization of ct to NULL in nf_conntrack_alloc, since the value is never used, and since perhaps it might lead one to think that return ct at the end might return NULL. The semantic patch that finds this problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @match exists@ expression x, E; position p1,p2; statement S1, S2; @@ x@p1 = nf_conntrack_alloc(...) ... when != x = E ( if (x@p2 == NULL || ...) S1 else S2 | if (x@p2 == NULL && ...) S1 else S2 ) @other_match exists@ expression match.x, E1, E2; position p1!=match.p1,match.p2; @@ x@p1 = E1 ... when != x = E2 x@p2 @ script:python depends on !other_match@ p1 << match.p1; p2 << match.p2; @@ print "%s: call to nf_conntrack_alloc %s bad test %s" % (p1[0].file,p1[0].line,p2[0].line) // Signed-off-by: Julia Lawall Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_core.c | 4 ++-- net/netfilter/nf_conntrack_netlink.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7e83f74cd5d..90ce9ddb945 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -469,7 +469,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, const struct nf_conntrack_tuple *repl, gfp_t gfp) { - struct nf_conn *ct = NULL; + struct nf_conn *ct; if (unlikely(!nf_conntrack_hash_rnd_initted)) { get_random_bytes(&nf_conntrack_hash_rnd, 4); @@ -551,7 +551,7 @@ init_conntrack(struct net *net, } ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC); - if (ct == NULL || IS_ERR(ct)) { + if (IS_ERR(ct)) { pr_debug("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)ct; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 00e8c27130f..3dddec6d2f7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1134,7 +1134,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conntrack_helper *helper; ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC); - if (ct == NULL || IS_ERR(ct)) + if (IS_ERR(ct)) return -ENOMEM; if (!cda[CTA_TIMEOUT]) -- cgit From e6210f3be5b13b6cda9c8dad8926818a73c8e6ac Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 12 Jan 2009 00:06:10 +0000 Subject: netfilter 08/09: xt_time: print timezone for user information netfilter: xt_time: print timezone for user information Let users have a way to figure out if their distro set the kernel timezone at all. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_time.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 29375ba8db7..93acaa59d10 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -243,6 +243,17 @@ static struct xt_match xt_time_mt_reg __read_mostly = { static int __init time_mt_init(void) { + int minutes = sys_tz.tz_minuteswest; + + if (minutes < 0) /* east of Greenwich */ + printk(KERN_INFO KBUILD_MODNAME + ": kernel timezone is +%02d%02d\n", + -minutes / 60, -minutes % 60); + else /* west of Greenwich */ + printk(KERN_INFO KBUILD_MODNAME + ": kernel timezone is -%02d%02d\n", + minutes / 60, minutes % 60); + return xt_register_match(&xt_time_mt_reg); } -- cgit From daaf83d2b9277928739f3eb7ea64f49c1254fd62 Mon Sep 17 00:00:00 2001 From: Richard Kennedy Date: Mon, 12 Jan 2009 00:06:11 +0000 Subject: netfilter 09/09: remove padding from struct xt_match on 64bit builds reorder struct xt_match to remove 8 bytes of padding and make its size 128 bytes. This saves a small amount of data space in each of the xt netfilter modules and fits xt_match in one 128 byte cache line. Signed-off-by: Richard Kennedy Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index e52ce475d19..c7ee8744d26 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -270,6 +270,7 @@ struct xt_match struct list_head list; const char name[XT_FUNCTION_MAXNAMELEN-1]; + u_int8_t revision; /* Return true or false: return FALSE and set *hotdrop = 1 to force immediate packet drop. */ @@ -302,7 +303,6 @@ struct xt_match unsigned short proto; unsigned short family; - u_int8_t revision; }; /* Registration hooks for targets. */ -- cgit From c08513471911cf33cb50249a7ff12848374f7263 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 12 Jan 2009 21:54:16 -0800 Subject: pkt_sched: sch_htb: Consider used jiffies in htb_do_events() Next event time should consider jiffies used for recounting. Otherwise qdisc_watchdog_schedule() triggers hrtimer immediately with the event in the past, and may cause very high ksoftirqd cpu usage (if highres is on). There is also removed checking "event" for zero in htb_dequeue(): it's always true in this place. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 5070643ce53..9ca8a26ba50 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -685,8 +685,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level) if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } - /* too much load - let's continue on next jiffie */ - return q->now + PSCHED_TICKS_PER_SEC / HZ; + /* too much load - let's continue on next jiffie (including above) */ + return q->now + 2 * PSCHED_TICKS_PER_SEC / HZ; } /* Returns class->node+prio from id-tree where classe's id is >= id. NULL @@ -873,7 +873,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) } else event = q->near_ev_cache[level]; - if (event && next_event > event) + if (next_event > event) next_event = event; m = ~q->row_mask[level]; -- cgit From a73be040650463eacb95f83d2e6673ac57b4fc59 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Mon, 12 Jan 2009 21:54:40 -0800 Subject: pkt_sched: sch_htb: Break all htb_do_events() after 2 jiffies Currently htb_do_events() breaks events recounting for a level after 2 jiffies, but there is no reason to repeat this for next levels and increase delays even more (with softirqs disabled). htb_dequeue_tree() can add to this too, btw. In such a case q->now time is invalid anyway. Thanks to Patrick McHardy for spotting an error around earlier version of this patch. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 9ca8a26ba50..2f0f0b04d3f 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -661,12 +661,13 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, * next pending event (0 for no event in pq). * Note: Applied are events whose have cl->pq_key <= q->now. */ -static psched_time_t htb_do_events(struct htb_sched *q, int level) +static psched_time_t htb_do_events(struct htb_sched *q, int level, + unsigned long start) { /* don't run for longer than 2 jiffies; 2 is used instead of 1 to simplify things when jiffy is going to be incremented too soon */ - unsigned long stop_at = jiffies + 2; + unsigned long stop_at = start + 2; while (time_before(jiffies, stop_at)) { struct htb_class *cl; long diff; @@ -845,6 +846,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) struct htb_sched *q = qdisc_priv(sch); int level; psched_time_t next_event; + unsigned long start_at; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ skb = __skb_dequeue(&q->direct_queue); @@ -857,6 +859,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) if (!sch->q.qlen) goto fin; q->now = psched_get_time(); + start_at = jiffies; next_event = q->now + 5 * PSCHED_TICKS_PER_SEC; @@ -866,7 +869,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) psched_time_t event; if (q->now >= q->near_ev_cache[level]) { - event = htb_do_events(q, level); + event = htb_do_events(q, level, start_at); if (!event) event = q->now + PSCHED_TICKS_PER_SEC; q->near_ev_cache[level] = event; -- cgit From b74f62c1e736ea01c660355526dd54132d241ca9 Mon Sep 17 00:00:00 2001 From: Denis Joseph Barrow Date: Mon, 12 Jan 2009 21:56:49 -0800 Subject: hso: driver fix for big endian machines. Filip Aben says this fix is neccessary for big endian machines. Signed-off-by: Denis Joseph Barrow Signed-off-by: David S. Miller --- drivers/net/usb/hso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index c4918b86ed1..d17dc5214c9 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -1792,8 +1792,8 @@ static int mux_device_request(struct hso_serial *serial, u8 type, u16 port, /* initialize */ ctrl_req->wValue = 0; - ctrl_req->wIndex = hso_port_to_mux(port); - ctrl_req->wLength = size; + ctrl_req->wIndex = cpu_to_le16(hso_port_to_mux(port)); + ctrl_req->wLength = cpu_to_le16(size); if (type == USB_CDC_GET_ENCAPSULATED_RESPONSE) { /* Reading command */ -- cgit From a6d0b91ae5dd01263530c96f9b29001cb1ed58b0 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Mon, 12 Jan 2009 21:57:34 -0800 Subject: gianfar: Fix soft lockup with multi-interrupt TSECs This patch fixes following bug: BUG: soft lockup - CPU#0 stuck for 61s! [S03mountvirtfs-:922] Modules linked in: NIP: c006505c LR: c00675f0 CTR: c0020438 REGS: c7a1db90 TRAP: 0901 Not tainted (2.6.28-rc8-01311-g8c7396a) MSR: 00009032 CR: 28248442 XER: 20000000 TASK = c7a288a0[922] 'S03mountvirtfs-' THREAD: c7a1c000 GPR00: 00009032 c7a1dc40 c7a288a0 00000024 c79a1840 00000000 00000300 00000020 GPR08: c035f97c 00000000 00004008 c04d5210 00000000 NIP [c006505c] handle_IRQ_event+0x34/0xb0 LR [c00675f0] handle_level_irq+0xa8/0x144 Call Trace: [c7a1dc40] [c00204d8] ipic_mask_irq+0xa0/0xb4 (unreliable) [c7a1dc60] [c00675f0] handle_level_irq+0xa8/0x144 [c7a1dc80] [c00067f8] do_IRQ+0x78/0x108 [c7a1dc90] [c0014d7c] ret_from_except+0x0/0x14 --- Exception: 501 at gfar_schedule_cleanup+0x54/0x7c LR = gfar_transmit+0x14/0x28 [c7a1dd50] [c0352a3c] _spin_unlock_irqrestore+0x18/0x30 (unreliable) [c7a1dd60] [c01f49a8] gfar_transmit+0x14/0x28 [c7a1dd70] [c0065084] handle_IRQ_event+0x5c/0xb0 [c7a1dd90] [c00675f0] handle_level_irq+0xa8/0x144 [c7a1ddb0] [c00067f8] do_IRQ+0x78/0x108 [c7a1ddc0] [c0014d7c] ret_from_except+0x0/0x14 --- Exception: 501 at up_read+0x10/0x48 LR = do_page_fault+0x2b0/0x3e0 [c7a1de80] [c7a177e8] 0xc7a177e8 (unreliable) [c7a1de90] [c0017964] do_page_fault+0x2b0/0x3e0 [c7a1df40] [c0014b14] handle_page_fault+0xc/0x80 --- Exception: 301 at 0xfe98b7c LR = 0xfe989c0 Instruction dump: 7c0802a6 bf810010 7c9f2378 7c7c1b78 90010024 80040004 70090020 40820010 7c0000a6 60008000 7c000124 3bc00000 <3ba00000> 48000010 83ff0014 2f9f0000 The bug introduced by commit 8c7396aebb68994c0519e438eecdf4d5fa9c7844 ("gianfar: Merge Tx and Rx interrupt for scheduling clean up ring"). The commit merged TX and RX interrupt code into a single routine that schedules NAPI, but no locks were introduced. This causes irq races, so when irqs are enabled and netif_rx_schedule_prep() returns 0, nobody disable the interrupts again. This leads to interrupt storm and finally to the lockup. Signed-off-by: Anton Vorontsov Signed-off-by: David S. Miller --- drivers/net/gianfar.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index efcbeb6c867..ea530673236 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -1622,10 +1622,18 @@ static int gfar_clean_tx_ring(struct net_device *dev) static void gfar_schedule_cleanup(struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); + unsigned long flags; + + spin_lock_irqsave(&priv->txlock, flags); + spin_lock(&priv->rxlock); + if (netif_rx_schedule_prep(&priv->napi)) { gfar_write(&priv->regs->imask, IMASK_RTX_DISABLED); __netif_rx_schedule(&priv->napi); } + + spin_unlock(&priv->rxlock); + spin_unlock_irqrestore(&priv->txlock, flags); } /* Interrupt Handler for Transmit complete */ -- cgit From 859975764fa61e927e7a69f46a55a4ba415785dd Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 12 Jan 2009 22:11:56 -0800 Subject: net: ppp_generic - fix regressions caused by IDR conversion The commits: 7a95d267fb62cd6b80ef73be0592bbbe1dbd5df7 ("net: ppp_generic - use idr technique instead of cardmaps") ab5024ab23b78c86a0a1425defcdde48710fe449 ("net: ppp_generic - use DEFINE_IDR for static initialization") introduced usage of IDR functionality but broke userspace side. Before this commits it was possible to allocate new ppp interface with specified number. Now it fails with EINVAL. Fix it by trying to allocate interface with specified unit number and return EEXIST if fail which allow pppd to ask us to allocate new unit number. And fix messages on memory allocation fails - add details that it's PPP module who is complaining. Signed-off-by: Cyrill Gorcunov Signed-off-by: David S. Miller --- drivers/net/ppp_generic.c | 43 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 06b448285eb..7b2728b8f1b 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -250,6 +250,7 @@ static int ppp_connect_channel(struct channel *pch, int unit); static int ppp_disconnect_channel(struct channel *pch); static void ppp_destroy_channel(struct channel *pch); static int unit_get(struct idr *p, void *ptr); +static int unit_set(struct idr *p, void *ptr, int n); static void unit_put(struct idr *p, int n); static void *unit_find(struct idr *p, int n); @@ -2432,11 +2433,18 @@ ppp_create_interface(int unit, int *retp) } else { if (unit_find(&ppp_units_idr, unit)) goto out2; /* unit already exists */ - else { - /* darn, someone is cheating us? */ - *retp = -EINVAL; + /* + * if caller need a specified unit number + * lets try to satisfy him, otherwise -- + * he should better ask us for new unit number + * + * NOTE: yes I know that returning EEXIST it's not + * fair but at least pppd will ask us to allocate + * new unit in this case so user is happy :) + */ + unit = unit_set(&ppp_units_idr, ppp, unit); + if (unit < 0) goto out2; - } } /* Initialize the new ppp unit */ @@ -2677,14 +2685,37 @@ static void __exit ppp_cleanup(void) * by holding all_ppp_mutex */ +/* associate pointer with specified number */ +static int unit_set(struct idr *p, void *ptr, int n) +{ + int unit, err; + +again: + if (!idr_pre_get(p, GFP_KERNEL)) { + printk(KERN_ERR "PPP: No free memory for idr\n"); + return -ENOMEM; + } + + err = idr_get_new_above(p, ptr, n, &unit); + if (err == -EAGAIN) + goto again; + + if (unit != n) { + idr_remove(p, unit); + return -EINVAL; + } + + return unit; +} + /* get new free unit number and associate pointer with it */ static int unit_get(struct idr *p, void *ptr) { int unit, err; again: - if (idr_pre_get(p, GFP_KERNEL) == 0) { - printk(KERN_ERR "Out of memory expanding drawable idr\n"); + if (!idr_pre_get(p, GFP_KERNEL)) { + printk(KERN_ERR "PPP: No free memory for idr\n"); return -ENOMEM; } -- cgit From f9d088b2080b476c86f8ddbc274851b89668c6d7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 13 Jan 2009 11:54:49 +0100 Subject: ALSA: hda - Fix a typo Fix a typo in stac92hd83xxx_cfg_tbl[]. The actual number is identical thus there is no behavior change. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_sigmatel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 35b83dc6e19..a4e2d8fcc8b 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1682,7 +1682,7 @@ static const char *stac92hd83xxx_models[STAC_92HD83XXX_MODELS] = { static struct snd_pci_quirk stac92hd83xxx_cfg_tbl[] = { /* SigmaTel reference board */ SND_PCI_QUIRK(PCI_VENDOR_ID_INTEL, 0x2668, - "DFI LanParty", STAC_92HD71BXX_REF), + "DFI LanParty", STAC_92HD83XXX_REF), {} /* terminator */ }; -- cgit From f84e3e915ea03dfa6e32626fc25a4f284ef222ac Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 13 Jan 2009 12:32:21 +0100 Subject: ALSA: hda - Add support of NVidia MCP78 HDMI Added the new id for NVidia MCP HDMI (10de:0007). Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_nvhdmi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_nvhdmi.c b/sound/pci/hda/patch_nvhdmi.c index 96952a37d88..d57d8132a06 100644 --- a/sound/pci/hda/patch_nvhdmi.c +++ b/sound/pci/hda/patch_nvhdmi.c @@ -160,6 +160,7 @@ static int patch_nvhdmi(struct hda_codec *codec) */ static struct hda_codec_preset snd_hda_preset_nvhdmi[] = { { .id = 0x10de0002, .name = "MCP78 HDMI", .patch = patch_nvhdmi }, + { .id = 0x10de0006, .name = "MCP78 HDMI", .patch = patch_nvhdmi }, { .id = 0x10de0007, .name = "MCP7A HDMI", .patch = patch_nvhdmi }, { .id = 0x10de0067, .name = "MCP67 HDMI", .patch = patch_nvhdmi }, { .id = 0x10de8001, .name = "MCP73 HDMI", .patch = patch_nvhdmi }, @@ -167,6 +168,7 @@ static struct hda_codec_preset snd_hda_preset_nvhdmi[] = { }; MODULE_ALIAS("snd-hda-codec-id:10de0002"); +MODULE_ALIAS("snd-hda-codec-id:10de0006"); MODULE_ALIAS("snd-hda-codec-id:10de0007"); MODULE_ALIAS("snd-hda-codec-id:10de0067"); MODULE_ALIAS("snd-hda-codec-id:10de8001"); -- cgit From dafb70ce1026d4d6ef1b16ad6996c9589bb11cce Mon Sep 17 00:00:00 2001 From: Giuseppe Bilotta Date: Tue, 13 Jan 2009 08:58:49 -0500 Subject: ALSA: hda - Add quirk for another HP dv5 Add the model=hp-m4 quirk for another HP dv5 (103c:3603) Reference: kernel bug#12440 http://bugzilla.kernel.org/show_bug.cgi?id=12440 Signed-off-by: Takashi Iwai Cc: stable@kernel.org --- sound/pci/hda/patch_sigmatel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index a4e2d8fcc8b..9acf2f0a2df 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -1736,6 +1736,8 @@ static struct snd_pci_quirk stac92hd71bxx_cfg_tbl[] = { "HP dv7", STAC_HP_M4), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x30fc, "HP dv7", STAC_HP_M4), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3603, + "HP dv5", STAC_HP_M4), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x361a, "unknown HP", STAC_HP_M4), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0233, -- cgit From 85b9e4fe13de9b35af1dbd50acc5f4978b9119ee Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 12 Jan 2009 14:37:44 -0500 Subject: mac80211: fix "‘ret’ may be used uninitialized" warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/mac80211/ht.c: In function ‘ieee80211_start_tx_ba_session’: net/mac80211/ht.c:472: warning: ‘ret’ may be used uninitialized in this function Signed-off-by: John W. Linville --- net/mac80211/ht.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 5f510a13b9f..c5c0c527109 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -469,7 +469,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) struct ieee80211_sub_if_data *sdata; u16 start_seq_num; u8 *state; - int ret; + int ret = 0; if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION)) return -EINVAL; -- cgit From 922d8a0b6d82fb40ffb561576e3800c3784ff43d Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 12 Jan 2009 14:40:20 -0500 Subject: b43: fix "‘gmode’ may be used uninitialized" warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/net/wireless/b43/main.c: In function ‘b43_op_config’: drivers/net/wireless/b43/main.c:3264: warning: ‘gmode’ may be used uninitialized Signed-off-by: John W. Linville --- drivers/net/wireless/b43/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 7b31a327b24..c788bad1066 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3261,7 +3261,7 @@ static int b43_switch_band(struct b43_wl *wl, struct ieee80211_channel *chan) struct b43_wldev *down_dev; struct b43_wldev *d; int err; - bool gmode; + bool uninitialized_var(gmode); int prev_status; /* Find a device and PHY which supports the band. */ -- cgit From 08cb7e01678b0a8d95d76aa0a395f2d7390f7ee1 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 12 Jan 2009 14:43:18 -0500 Subject: b43legacy: fix "‘up_dev’ may be used uninitialized" warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/net/wireless/b43legacy/main.c: In function ‘b43legacy_op_dev_config’: drivers/net/wireless/b43legacy/main.c:2468: warning: ‘up_dev’ may be used uninitialized in this function Signed-off-by: John W. Linville --- drivers/net/wireless/b43legacy/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index c1324e31d2f..fb996c27a19 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -2465,7 +2465,7 @@ static void b43legacy_put_phy_into_reset(struct b43legacy_wldev *dev) static int b43legacy_switch_phymode(struct b43legacy_wl *wl, unsigned int new_mode) { - struct b43legacy_wldev *up_dev; + struct b43legacy_wldev *uninitialized_var(up_dev); struct b43legacy_wldev *down_dev; int err; bool gmode = 0; -- cgit From 25a4cceaa44a7f73c8f92e6177812347500a0b15 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 12 Jan 2009 14:44:52 -0500 Subject: iwl3945: fix "‘power_idx’ may be used uninitialized" warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/net/wireless/iwlwifi/iwl-3945.c: In function ‘iwl3945_txpower_set_from_eeprom’: drivers/net/wireless/iwlwifi/iwl-3945.c:2222: warning: ‘power_idx’ may be used uninitialized in this function Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-3945.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/iwl-3945.c b/drivers/net/wireless/iwlwifi/iwl-3945.c index 8fdb34222c0..45cfa1cf194 100644 --- a/drivers/net/wireless/iwlwifi/iwl-3945.c +++ b/drivers/net/wireless/iwlwifi/iwl-3945.c @@ -2219,7 +2219,7 @@ int iwl3945_txpower_set_from_eeprom(struct iwl3945_priv *priv) /* set tx power value for all OFDM rates */ for (rate_index = 0; rate_index < IWL_OFDM_RATES; rate_index++) { - s32 power_idx; + s32 uninitialized_var(power_idx); int rc; /* use channel group's clip-power table, -- cgit From 26d1597c9a4532eec74f9651c4c96483cb8892fe Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Mon, 12 Jan 2009 14:46:39 -0500 Subject: p54: fix "‘ret’ may be used uninitialized" warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit drivers/net/wireless/p54/p54common.c: In function ‘p54_config’: drivers/net/wireless/p54/p54common.c:1853: warning: ‘ret’ may be used uninitialized in this function Signed-off-by: John W. Linville --- drivers/net/wireless/p54/p54common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/p54/p54common.c b/drivers/net/wireless/p54/p54common.c index cba89ed0f57..c6a370fa9bc 100644 --- a/drivers/net/wireless/p54/p54common.c +++ b/drivers/net/wireless/p54/p54common.c @@ -1850,7 +1850,7 @@ static void p54_remove_interface(struct ieee80211_hw *dev, static int p54_config(struct ieee80211_hw *dev, u32 changed) { - int ret; + int ret = 0; struct p54_common *priv = dev->priv; struct ieee80211_conf *conf = &dev->conf; -- cgit From 29bdccbee69c199910b2b39377e66ee5c33f241c Mon Sep 17 00:00:00 2001 From: Anirban Chakraborty Date: Thu, 8 Jan 2009 15:41:08 -0800 Subject: [SCSI] qla2xxx: Fix ISP restart bug in multiq code After restarting ISP the additional queues are not being setup correctly. The following patch fixes the issue. Please apply. Signed-off-by: Anirban Chakraborty Signed-off-by: James Bottomley --- drivers/scsi/qla2xxx/qla_init.c | 58 +++++++++++++++++++++++++---------------- drivers/scsi/qla2xxx/qla_mid.c | 1 + drivers/scsi/qla2xxx/qla_os.c | 7 +++-- 3 files changed, 40 insertions(+), 26 deletions(-) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 2d4f32b4df5..9ad4d0968e5 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -1258,35 +1258,48 @@ qla2x00_init_rings(scsi_qla_host_t *vha) { int rval; unsigned long flags = 0; - int cnt; + int cnt, que; struct qla_hw_data *ha = vha->hw; - struct req_que *req = ha->req_q_map[0]; - struct rsp_que *rsp = ha->rsp_q_map[0]; + struct req_que *req; + struct rsp_que *rsp; + struct scsi_qla_host *vp; struct mid_init_cb_24xx *mid_init_cb = (struct mid_init_cb_24xx *) ha->init_cb; spin_lock_irqsave(&ha->hardware_lock, flags); /* Clear outstanding commands array. */ - for (cnt = 0; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) - req->outstanding_cmds[cnt] = NULL; + for (que = 0; que < ha->max_queues; que++) { + req = ha->req_q_map[que]; + if (!req) + continue; + for (cnt = 0; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) + req->outstanding_cmds[cnt] = NULL; - req->current_outstanding_cmd = 0; + req->current_outstanding_cmd = 0; - /* Clear RSCN queue. */ - vha->rscn_in_ptr = 0; - vha->rscn_out_ptr = 0; + /* Initialize firmware. */ + req->ring_ptr = req->ring; + req->ring_index = 0; + req->cnt = req->length; + } - /* Initialize firmware. */ - req->ring_ptr = req->ring; - req->ring_index = 0; - req->cnt = req->length; - rsp->ring_ptr = rsp->ring; - rsp->ring_index = 0; + for (que = 0; que < ha->max_queues; que++) { + rsp = ha->rsp_q_map[que]; + if (!rsp) + continue; + rsp->ring_ptr = rsp->ring; + rsp->ring_index = 0; - /* Initialize response queue entries */ - qla2x00_init_response_q_entries(rsp); + /* Initialize response queue entries */ + qla2x00_init_response_q_entries(rsp); + } + /* Clear RSCN queue. */ + list_for_each_entry(vp, &ha->vp_list, list) { + vp->rscn_in_ptr = 0; + vp->rscn_out_ptr = 0; + } ha->isp_ops->config_rings(vha); spin_unlock_irqrestore(&ha->hardware_lock, flags); @@ -3212,8 +3225,8 @@ qla2x00_loop_resync(scsi_qla_host_t *vha) int rval = QLA_SUCCESS; uint32_t wait_time; struct qla_hw_data *ha = vha->hw; - struct req_que *req = ha->req_q_map[0]; - struct rsp_que *rsp = ha->rsp_q_map[0]; + struct req_que *req = ha->req_q_map[vha->req_ques[0]]; + struct rsp_que *rsp = req->rsp; atomic_set(&vha->loop_state, LOOP_UPDATE); clear_bit(ISP_ABORT_RETRY, &vha->dpc_flags); @@ -3492,6 +3505,7 @@ qla25xx_init_queues(struct qla_hw_data *ha) } req = ha->req_q_map[i]; if (req) { + /* Clear outstanding commands array. */ req->options &= ~BIT_0; ret = qla25xx_init_req_que(base_vha, req, req->options); if (ret != QLA_SUCCESS) @@ -3500,7 +3514,7 @@ qla25xx_init_queues(struct qla_hw_data *ha) req->id)); else DEBUG2_17(printk(KERN_WARNING - "%s Rsp que:%d inited\n", __func__, + "%s Req que:%d inited\n", __func__, req->id)); } } @@ -4151,8 +4165,8 @@ qla24xx_configure_vhba(scsi_qla_host_t *vha) uint16_t mb[MAILBOX_REGISTER_COUNT]; struct qla_hw_data *ha = vha->hw; struct scsi_qla_host *base_vha = pci_get_drvdata(ha->pdev); - struct req_que *req = ha->req_q_map[0]; - struct rsp_que *rsp = ha->rsp_q_map[0]; + struct req_que *req = ha->req_q_map[vha->req_ques[0]]; + struct rsp_que *rsp = req->rsp; if (!vha->vp_idx) return -EINVAL; diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 886323130fc..f53179c4642 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -629,6 +629,7 @@ qla25xx_create_req_que(struct qla_hw_data *ha, uint16_t options, req->ring_index = 0; req->cnt = req->length; req->id = que_id; + req->max_q_depth = ha->req_q_map[0]->max_q_depth; mutex_unlock(&ha->vport_lock); ret = qla25xx_init_req_que(base_vha, req, options); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 4a71f522f92..cf32653fe01 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1158,8 +1158,8 @@ qla2x00_abort_all_cmds(scsi_qla_host_t *vha, int res) struct req_que *req; spin_lock_irqsave(&ha->hardware_lock, flags); - for (que = 0; que < QLA_MAX_HOST_QUES; que++) { - req = ha->req_q_map[vha->req_ques[que]]; + for (que = 0; que < ha->max_queues; que++) { + req = ha->req_q_map[que]; if (!req) continue; for (cnt = 1; cnt < MAX_OUTSTANDING_COMMANDS; cnt++) { @@ -1193,7 +1193,7 @@ qla2xxx_slave_configure(struct scsi_device *sdev) scsi_qla_host_t *vha = shost_priv(sdev->host); struct qla_hw_data *ha = vha->hw; struct fc_rport *rport = starget_to_rport(sdev->sdev_target); - struct req_que *req = ha->req_q_map[0]; + struct req_que *req = ha->req_q_map[vha->req_ques[0]]; if (sdev->tagged_supported) scsi_activate_tcq(sdev, req->max_q_depth); @@ -1998,7 +1998,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) return 0; probe_failed: - qla2x00_free_que(ha, req, rsp); qla2x00_free_device(base_vha); scsi_host_put(base_vha->host); -- cgit From fd7a253311412b3fc7c85586552c90eca61e7d23 Mon Sep 17 00:00:00 2001 From: "Kashyap, Desai" Date: Tue, 6 Jan 2009 14:56:31 +0530 Subject: [SCSI] mpt fusion: Update MPI Headers to version 01.05.19 This Patch is submitted to increment the MPI headers used by LSI MPT fusion drivers to the latest version 01.05.19. Year is changed in CopyRight. Signed-off-by: Kashyap Desai Signed-off-by: James Bottomley --- drivers/message/fusion/lsi/mpi.h | 7 ++- drivers/message/fusion/lsi/mpi_cnfg.h | 47 +++++++++++++++- drivers/message/fusion/lsi/mpi_fc.h | 2 +- drivers/message/fusion/lsi/mpi_history.txt | 86 +++++++++++++++++++++--------- drivers/message/fusion/lsi/mpi_init.h | 2 +- drivers/message/fusion/lsi/mpi_ioc.h | 22 +++++++- drivers/message/fusion/lsi/mpi_lan.h | 2 +- drivers/message/fusion/lsi/mpi_log_fc.h | 2 +- drivers/message/fusion/lsi/mpi_log_sas.h | 37 ++++++++++--- drivers/message/fusion/lsi/mpi_raid.h | 11 +++- drivers/message/fusion/lsi/mpi_sas.h | 18 +++++-- drivers/message/fusion/lsi/mpi_targ.h | 2 +- drivers/message/fusion/lsi/mpi_tool.h | 2 +- drivers/message/fusion/lsi/mpi_type.h | 4 +- 14 files changed, 191 insertions(+), 53 deletions(-) diff --git a/drivers/message/fusion/lsi/mpi.h b/drivers/message/fusion/lsi/mpi.h index 10b6ef75872..11c0f461320 100644 --- a/drivers/message/fusion/lsi/mpi.h +++ b/drivers/message/fusion/lsi/mpi.h @@ -6,7 +6,7 @@ * Title: MPI Message independent structures and definitions * Creation Date: July 27, 2000 * - * mpi.h Version: 01.05.13 + * mpi.h Version: 01.05.16 * * Version History * --------------- @@ -79,6 +79,9 @@ * 03-27-06 01.05.11 Bumped MPI_HEADER_VERSION_UNIT. * 10-11-06 01.05.12 Bumped MPI_HEADER_VERSION_UNIT. * 05-24-07 01.05.13 Bumped MPI_HEADER_VERSION_UNIT. + * 08-07-07 01.05.14 Bumped MPI_HEADER_VERSION_UNIT. + * 01-15-08 01.05.15 Bumped MPI_HEADER_VERSION_UNIT. + * 03-28-08 01.05.16 Bumped MPI_HEADER_VERSION_UNIT. * -------------------------------------------------------------------------- */ @@ -109,7 +112,7 @@ /* Note: The major versions of 0xe0 through 0xff are reserved */ /* versioning for this MPI header set */ -#define MPI_HEADER_VERSION_UNIT (0x10) +#define MPI_HEADER_VERSION_UNIT (0x13) #define MPI_HEADER_VERSION_DEV (0x00) #define MPI_HEADER_VERSION_UNIT_MASK (0xFF00) #define MPI_HEADER_VERSION_UNIT_SHIFT (8) diff --git a/drivers/message/fusion/lsi/mpi_cnfg.h b/drivers/message/fusion/lsi/mpi_cnfg.h index b2db3330c59..013c7d88194 100644 --- a/drivers/message/fusion/lsi/mpi_cnfg.h +++ b/drivers/message/fusion/lsi/mpi_cnfg.h @@ -6,7 +6,7 @@ * Title: MPI Config message, structures, and Pages * Creation Date: July 27, 2000 * - * mpi_cnfg.h Version: 01.05.15 + * mpi_cnfg.h Version: 01.05.18 * * Version History * --------------- @@ -308,6 +308,20 @@ * Expander Page 0 Flags field. * Fixed define for * MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED. + * 08-07-07 01.05.16 Added MPI_IOCPAGE6_CAP_FLAGS_MULTIPORT_DRIVE_SUPPORT + * define. + * Added BIOS Page 4 structure. + * Added MPI_RAID_PHYS_DISK1_PATH_MAX define for RAID + * Physcial Disk Page 1. + * 01-15-07 01.05.17 Added additional bit defines for ExtFlags field of + * Manufacturing Page 4. + * Added Solid State Drives Supported bit to IOC Page 6 + * Capabilities Flags. + * Added new value for AccessStatus field of SAS Device + * Page 0 (_SATA_NEEDS_INITIALIZATION). + * 03-28-08 01.05.18 Defined new bits in Manufacturing Page 4 ExtFlags field + * to control coercion size and the mixing of SAS and SATA + * SSD drives. * -------------------------------------------------------------------------- */ @@ -686,6 +700,14 @@ typedef struct _CONFIG_PAGE_MANUFACTURING_4 #define MPI_MANPAGE4_IR_NO_MIX_SAS_SATA (0x01) /* defines for the ExtFlags field */ +#define MPI_MANPAGE4_EXTFLAGS_MASK_COERCION_SIZE (0x0180) +#define MPI_MANPAGE4_EXTFLAGS_SHIFT_COERCION_SIZE (7) +#define MPI_MANPAGE4_EXTFLAGS_1GB_COERCION_SIZE (0) +#define MPI_MANPAGE4_EXTFLAGS_128MB_COERCION_SIZE (1) + +#define MPI_MANPAGE4_EXTFLAGS_NO_MIX_SSD_SAS_SATA (0x0040) +#define MPI_MANPAGE4_EXTFLAGS_MIX_SSD_AND_NON_SSD (0x0020) +#define MPI_MANPAGE4_EXTFLAGS_DUAL_PORT_SUPPORT (0x0010) #define MPI_MANPAGE4_EXTFLAGS_HIDE_NON_IR_METADATA (0x0008) #define MPI_MANPAGE4_EXTFLAGS_SAS_CACHE_DISABLE (0x0004) #define MPI_MANPAGE4_EXTFLAGS_SATA_CACHE_DISABLE (0x0002) @@ -1159,6 +1181,8 @@ typedef struct _CONFIG_PAGE_IOC_6 /* IOC Page 6 Capabilities Flags */ +#define MPI_IOCPAGE6_CAP_FLAGS_SSD_SUPPORT (0x00000020) +#define MPI_IOCPAGE6_CAP_FLAGS_MULTIPORT_DRIVE_SUPPORT (0x00000010) #define MPI_IOCPAGE6_CAP_FLAGS_DISABLE_SMART_POLLING (0x00000008) #define MPI_IOCPAGE6_CAP_FLAGS_MASK_METADATA_SIZE (0x00000006) @@ -1428,6 +1452,15 @@ typedef struct _CONFIG_PAGE_BIOS_2 #define MPI_BIOSPAGE2_FORM_SAS_WWN (0x05) #define MPI_BIOSPAGE2_FORM_ENCLOSURE_SLOT (0x06) +typedef struct _CONFIG_PAGE_BIOS_4 +{ + CONFIG_PAGE_HEADER Header; /* 00h */ + U64 ReassignmentBaseWWID; /* 04h */ +} CONFIG_PAGE_BIOS_4, MPI_POINTER PTR_CONFIG_PAGE_BIOS_4, + BIOSPage4_t, MPI_POINTER pBIOSPage4_t; + +#define MPI_BIOSPAGE4_PAGEVERSION (0x00) + /**************************************************************************** * SCSI Port Config Pages @@ -2419,6 +2452,15 @@ typedef struct _RAID_PHYS_DISK1_PATH #define MPI_RAID_PHYSDISK1_FLAG_BROKEN (0x0002) #define MPI_RAID_PHYSDISK1_FLAG_INVALID (0x0001) + +/* + * Host code (drivers, BIOS, utilities, etc.) should leave this define set to + * one and check Header.PageLength or NumPhysDiskPaths at runtime. + */ +#ifndef MPI_RAID_PHYS_DISK1_PATH_MAX +#define MPI_RAID_PHYS_DISK1_PATH_MAX (1) +#endif + typedef struct _CONFIG_PAGE_RAID_PHYS_DISK_1 { CONFIG_PAGE_HEADER Header; /* 00h */ @@ -2426,7 +2468,7 @@ typedef struct _CONFIG_PAGE_RAID_PHYS_DISK_1 U8 PhysDiskNum; /* 05h */ U16 Reserved2; /* 06h */ U32 Reserved1; /* 08h */ - RAID_PHYS_DISK1_PATH Path[1]; /* 0Ch */ + RAID_PHYS_DISK1_PATH Path[MPI_RAID_PHYS_DISK1_PATH_MAX];/* 0Ch */ } CONFIG_PAGE_RAID_PHYS_DISK_1, MPI_POINTER PTR_CONFIG_PAGE_RAID_PHYS_DISK_1, RaidPhysDiskPage1_t, MPI_POINTER pRaidPhysDiskPage1_t; @@ -2844,6 +2886,7 @@ typedef struct _CONFIG_PAGE_SAS_DEVICE_0 #define MPI_SAS_DEVICE0_ASTATUS_SATA_INIT_FAILED (0x01) #define MPI_SAS_DEVICE0_ASTATUS_SATA_CAPABILITY_FAILED (0x02) #define MPI_SAS_DEVICE0_ASTATUS_SATA_AFFILIATION_CONFLICT (0x03) +#define MPI_SAS_DEVICE0_ASTATUS_SATA_NEEDS_INITIALIZATION (0x04) /* specific values for SATA Init failures */ #define MPI_SAS_DEVICE0_ASTATUS_SIF_UNKNOWN (0x10) #define MPI_SAS_DEVICE0_ASTATUS_SIF_AFFILIATION_CONFLICT (0x11) diff --git a/drivers/message/fusion/lsi/mpi_fc.h b/drivers/message/fusion/lsi/mpi_fc.h index 627acfbb862..7d663ce76f8 100644 --- a/drivers/message/fusion/lsi/mpi_fc.h +++ b/drivers/message/fusion/lsi/mpi_fc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 LSI Corporation. + * Copyright (c) 2000-2008 LSI Corporation. * * * Name: mpi_fc.h diff --git a/drivers/message/fusion/lsi/mpi_history.txt b/drivers/message/fusion/lsi/mpi_history.txt index 3f15fcfe4a2..693e4b51135 100644 --- a/drivers/message/fusion/lsi/mpi_history.txt +++ b/drivers/message/fusion/lsi/mpi_history.txt @@ -3,28 +3,28 @@ MPI Header File Change History ============================== - Copyright (c) 2000-2007 LSI Corporation. + Copyright (c) 2000-2008 LSI Corporation. --------------------------------------- - Header Set Release Version: 01.05.16 - Header Set Release Date: 05-24-07 + Header Set Release Version: 01.05.19 + Header Set Release Date: 03-28-08 --------------------------------------- Filename Current version Prior version ---------- --------------- ------------- - mpi.h 01.05.13 01.05.12 - mpi_ioc.h 01.05.14 01.05.13 - mpi_cnfg.h 01.05.15 01.05.14 + mpi.h 01.05.16 01.05.15 + mpi_ioc.h 01.05.16 01.05.15 + mpi_cnfg.h 01.05.18 01.05.17 mpi_init.h 01.05.09 01.05.09 mpi_targ.h 01.05.06 01.05.06 mpi_fc.h 01.05.01 01.05.01 mpi_lan.h 01.05.01 01.05.01 - mpi_raid.h 01.05.03 01.05.03 + mpi_raid.h 01.05.05 01.05.05 mpi_tool.h 01.05.03 01.05.03 mpi_inb.h 01.05.01 01.05.01 - mpi_sas.h 01.05.04 01.05.04 + mpi_sas.h 01.05.05 01.05.05 mpi_type.h 01.05.02 01.05.02 - mpi_history.txt 01.05.14 01.05.14 + mpi_history.txt 01.05.19 01.05.18 * Date Version Description @@ -96,6 +96,9 @@ mpi.h * 03-27-06 01.05.11 Bumped MPI_HEADER_VERSION_UNIT. * 10-11-06 01.05.12 Bumped MPI_HEADER_VERSION_UNIT. * 05-24-07 01.05.13 Bumped MPI_HEADER_VERSION_UNIT. + * 08-07-07 01.05.14 Bumped MPI_HEADER_VERSION_UNIT. + * 01-15-08 01.05.15 Bumped MPI_HEADER_VERSION_UNIT. + * 03-28-08 01.05.16 Bumped MPI_HEADER_VERSION_UNIT. * -------------------------------------------------------------------------- mpi_ioc.h @@ -127,7 +130,7 @@ mpi_ioc.h * 08-08-01 01.02.01 Original release for v1.2 work. * New format for FWVersion and ProductId in * MSG_IOC_FACTS_REPLY and MPI_FW_HEADER. - * 08-31-01 01.02.02 Added event MPI_EVENT_SCSI_DEVICE_STATUS_CHANGE and + * 08-31-01 01.02.02 Addded event MPI_EVENT_SCSI_DEVICE_STATUS_CHANGE and * related structure and defines. * Added event MPI_EVENT_ON_BUS_TIMER_EXPIRED. * Added MPI_IOCINIT_FLAGS_DISCARD_FW_IMAGE. @@ -187,7 +190,7 @@ mpi_ioc.h * 10-11-06 01.05.12 Added MPI_IOCFACTS_EXCEPT_METADATA_UNSUPPORTED. * Added MaxInitiators field to PortFacts reply. * Added SAS Device Status Change ReasonCode for - * asynchronous notification. + * asynchronous notificaiton. * Added MPI_EVENT_SAS_EXPANDER_STATUS_CHANGE and event * data structure. * Added new ImageType values for FWDownload and FWUpload @@ -199,6 +202,16 @@ mpi_ioc.h * added _MULTI_PORT_DOMAIN. * 05-24-07 01.05.14 Added Common Boot Block type to FWDownload Request. * Added Common Boot Block type to FWUpload Request. + * 08-07-07 01.05.15 Added MPI_EVENT_SAS_INIT_RC_REMOVED define. + * Added MPI_EVENT_IR2_RC_DUAL_PORT_ADDED and + * MPI_EVENT_IR2_RC_DUAL_PORT_REMOVED for IR2 event data. + * Added SASAddress field to SAS Initiator Device Table + * Overflow event data structure. + * 03-28-08 01.05.16 Added two new ReasonCode values to SAS Device Status + * Change Event data to indicate completion of internally + * generated task management. + * Added MPI_EVENT_DSCVRY_ERR_DS_SATA_INIT_FAILURE define. + * Added MPI_EVENT_SAS_INIT_RC_INACCESSIBLE define. * -------------------------------------------------------------------------- mpi_cnfg.h @@ -213,7 +226,7 @@ mpi_cnfg.h * Added _RESPONSE_ID_MASK definition to SCSI_PORT_1 * page and updated the page version. * Added Information field and _INFO_PARAMS_NEGOTIATED - * definition to SCSI_DEVICE_0 page. + * definitionto SCSI_DEVICE_0 page. * 06-22-00 01.00.03 Removed batch controls from LAN_0 page and updated the * page version. * Added BucketsRemaining to LAN_1 page, redefined the @@ -496,6 +509,20 @@ mpi_cnfg.h * Expander Page 0 Flags field. * Fixed define for * MPI_SAS_EXPANDER1_DISCINFO_BAD_PHY_DISABLED. + * 08-07-07 01.05.16 Added MPI_IOCPAGE6_CAP_FLAGS_MULTIPORT_DRIVE_SUPPORT + * define. + * Added BIOS Page 4 structure. + * Added MPI_RAID_PHYS_DISK1_PATH_MAX define for RAID + * Physcial Disk Page 1. + * 01-15-07 01.05.17 Added additional bit defines for ExtFlags field of + * Manufacturing Page 4. + * Added Solid State Drives Supported bit to IOC Page 6 + * Capabilities Flags. + * Added new value for AccessStatus field of SAS Device + * Page 0 (_SATA_NEEDS_INITIALIZATION). + * 03-28-08 01.05.18 Defined new bits in Manufacturing Page 4 ExtFlags field + * to control coercion size and the mixing of SAS and SATA + * SSD drives. * -------------------------------------------------------------------------- mpi_init.h @@ -661,6 +688,9 @@ mpi_raid.h * _SET_RESYNC_RATE and _SET_DATA_SCRUB_RATE. * 02-28-07 01.05.03 Added new RAID Action, Device FW Update Mode, and * associated defines. + * 08-07-07 01.05.04 Added Disable Full Rebuild bit to the ActionDataWord + * for the RAID Action MPI_RAID_ACTION_DISABLE_VOLUME. + * 01-15-08 01.05.05 Added define for MPI_RAID_ACTION_SET_VOLUME_NAME. * -------------------------------------------------------------------------- mpi_tool.h @@ -694,6 +724,10 @@ mpi_sas.h * reply. * 10-11-06 01.05.04 Fixed the name of a define for Operation field of SAS IO * Unit Control request. + * 01-15-08 01.05.05 Added support for MPI_SAS_OP_SET_IOC_PARAMETER, + * including adding IOCParameter and IOCParameter value + * fields to SAS IO Unit Control Request. + * Added MPI_SAS_DEVICE_INFO_PRODUCT_SPECIFIC define. * -------------------------------------------------------------------------- mpi_type.h @@ -709,20 +743,20 @@ mpi_type.h mpi_history.txt Parts list history -Filename 01.05.15 01.05.15 ----------- -------- -------- -mpi.h 01.05.12 01.05.13 -mpi_ioc.h 01.05.13 01.05.14 -mpi_cnfg.h 01.05.14 01.05.15 -mpi_init.h 01.05.09 01.05.09 -mpi_targ.h 01.05.06 01.05.06 -mpi_fc.h 01.05.01 01.05.01 -mpi_lan.h 01.05.01 01.05.01 -mpi_raid.h 01.05.03 01.05.03 -mpi_tool.h 01.05.03 01.05.03 -mpi_inb.h 01.05.01 01.05.01 -mpi_sas.h 01.05.04 01.05.04 -mpi_type.h 01.05.02 01.05.02 +Filename 01.05.19 01.05.18 01.05.17 01.05.16 01.05.15 +---------- -------- -------- -------- -------- -------- +mpi.h 01.05.16 01.05.15 01.05.14 01.05.13 01.05.12 +mpi_ioc.h 01.05.16 01.05.15 01.05.15 01.05.14 01.05.13 +mpi_cnfg.h 01.05.18 01.05.17 01.05.16 01.05.15 01.05.14 +mpi_init.h 01.05.09 01.05.09 01.05.09 01.05.09 01.05.09 +mpi_targ.h 01.05.06 01.05.06 01.05.06 01.05.06 01.05.06 +mpi_fc.h 01.05.01 01.05.01 01.05.01 01.05.01 01.05.01 +mpi_lan.h 01.05.01 01.05.01 01.05.01 01.05.01 01.05.01 +mpi_raid.h 01.05.05 01.05.05 01.05.04 01.05.03 01.05.03 +mpi_tool.h 01.05.03 01.05.03 01.05.03 01.05.03 01.05.03 +mpi_inb.h 01.05.01 01.05.01 01.05.01 01.05.01 01.05.01 +mpi_sas.h 01.05.05 01.05.05 01.05.04 01.05.04 01.05.04 +mpi_type.h 01.05.02 01.05.02 01.05.02 01.05.02 01.05.02 Filename 01.05.14 01.05.13 01.05.12 01.05.11 01.05.10 01.05.09 ---------- -------- -------- -------- -------- -------- -------- diff --git a/drivers/message/fusion/lsi/mpi_init.h b/drivers/message/fusion/lsi/mpi_init.h index a9e3693601a..4295d062caa 100644 --- a/drivers/message/fusion/lsi/mpi_init.h +++ b/drivers/message/fusion/lsi/mpi_init.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 LSI Corporation. + * Copyright (c) 2000-2008 LSI Corporation. * * * Name: mpi_init.h diff --git a/drivers/message/fusion/lsi/mpi_ioc.h b/drivers/message/fusion/lsi/mpi_ioc.h index 5cbb6bd048e..8faa4fab7b8 100644 --- a/drivers/message/fusion/lsi/mpi_ioc.h +++ b/drivers/message/fusion/lsi/mpi_ioc.h @@ -1,12 +1,12 @@ /* - * Copyright (c) 2000-2007 LSI Corporation. + * Copyright (c) 2000-2008 LSI Corporation. * * * Name: mpi_ioc.h * Title: MPI IOC, Port, Event, FW Download, and FW Upload messages * Creation Date: August 11, 2000 * - * mpi_ioc.h Version: 01.05.14 + * mpi_ioc.h Version: 01.05.16 * * Version History * --------------- @@ -113,6 +113,16 @@ * added _MULTI_PORT_DOMAIN. * 05-24-07 01.05.14 Added Common Boot Block type to FWDownload Request. * Added Common Boot Block type to FWUpload Request. + * 08-07-07 01.05.15 Added MPI_EVENT_SAS_INIT_RC_REMOVED define. + * Added MPI_EVENT_IR2_RC_DUAL_PORT_ADDED and + * MPI_EVENT_IR2_RC_DUAL_PORT_REMOVED for IR2 event data. + * Added SASAddress field to SAS Initiator Device Table + * Overflow event data structure. + * 03-28-08 01.05.16 Added two new ReasonCode values to SAS Device Status + * Change Event data to indicate completion of internally + * generated task management. + * Added MPI_EVENT_DSCVRY_ERR_DS_SATA_INIT_FAILURE define. + * Added MPI_EVENT_SAS_INIT_RC_INACCESSIBLE define. * -------------------------------------------------------------------------- */ @@ -612,6 +622,8 @@ typedef struct _EVENT_DATA_SAS_DEVICE_STATUS_CHANGE #define MPI_EVENT_SAS_DEV_STAT_RC_CLEAR_TASK_SET_INTERNAL (0x0B) #define MPI_EVENT_SAS_DEV_STAT_RC_QUERY_TASK_INTERNAL (0x0C) #define MPI_EVENT_SAS_DEV_STAT_RC_ASYNC_NOTIFICATION (0x0D) +#define MPI_EVENT_SAS_DEV_STAT_RC_CMPL_INTERNAL_DEV_RESET (0x0E) +#define MPI_EVENT_SAS_DEV_STAT_RC_CMPL_TASK_ABORT_INTERNAL (0x0F) /* SCSI Event data for Queue Full event */ @@ -708,6 +720,8 @@ typedef struct _MPI_EVENT_DATA_IR2 #define MPI_EVENT_IR2_RC_PD_REMOVED (0x05) #define MPI_EVENT_IR2_RC_FOREIGN_CFG_DETECTED (0x06) #define MPI_EVENT_IR2_RC_REBUILD_MEDIUM_ERROR (0x07) +#define MPI_EVENT_IR2_RC_DUAL_PORT_ADDED (0x08) +#define MPI_EVENT_IR2_RC_DUAL_PORT_REMOVED (0x09) /* defines for logical disk states */ #define MPI_LD_STATE_OPTIMAL (0x00) @@ -867,6 +881,7 @@ typedef struct _EVENT_DATA_DISCOVERY_ERROR #define MPI_EVENT_DSCVRY_ERR_DS_UNSUPPORTED_DEVICE (0x00000800) #define MPI_EVENT_DSCVRY_ERR_DS_MAX_SATA_TARGETS (0x00001000) #define MPI_EVENT_DSCVRY_ERR_DS_MULTI_PORT_DOMAIN (0x00002000) +#define MPI_EVENT_DSCVRY_ERR_DS_SATA_INIT_FAILURE (0x00004000) /* SAS SMP Error Event data */ @@ -902,6 +917,8 @@ typedef struct _EVENT_DATA_SAS_INIT_DEV_STATUS_CHANGE /* defines for the ReasonCode field of the SAS Initiator Device Status Change event */ #define MPI_EVENT_SAS_INIT_RC_ADDED (0x01) +#define MPI_EVENT_SAS_INIT_RC_REMOVED (0x02) +#define MPI_EVENT_SAS_INIT_RC_INACCESSIBLE (0x03) /* SAS Initiator Device Table Overflow Event data */ @@ -910,6 +927,7 @@ typedef struct _EVENT_DATA_SAS_INIT_TABLE_OVERFLOW U8 MaxInit; /* 00h */ U8 CurrentInit; /* 01h */ U16 Reserved1; /* 02h */ + U64 SASAddress; /* 04h */ } EVENT_DATA_SAS_INIT_TABLE_OVERFLOW, MPI_POINTER PTR_EVENT_DATA_SAS_INIT_TABLE_OVERFLOW, MpiEventDataSasInitTableOverflow_t, diff --git a/drivers/message/fusion/lsi/mpi_lan.h b/drivers/message/fusion/lsi/mpi_lan.h index 03253b53b78..f41fcb69b35 100644 --- a/drivers/message/fusion/lsi/mpi_lan.h +++ b/drivers/message/fusion/lsi/mpi_lan.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 LSI Corporation. + * Copyright (c) 2000-2008 LSI Corporation. * * * Name: mpi_lan.h diff --git a/drivers/message/fusion/lsi/mpi_log_fc.h b/drivers/message/fusion/lsi/mpi_log_fc.h index e4dafcefeec..face6e7acc7 100644 --- a/drivers/message/fusion/lsi/mpi_log_fc.h +++ b/drivers/message/fusion/lsi/mpi_log_fc.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2001 LSI Corporation. All rights reserved. + * Copyright (c) 2000-2008 LSI Corporation. All rights reserved. * * NAME: fc_log.h * SUMMARY: MPI IocLogInfo definitions for the SYMFC9xx chips diff --git a/drivers/message/fusion/lsi/mpi_log_sas.h b/drivers/message/fusion/lsi/mpi_log_sas.h index af9da03e95e..691620dbedd 100644 --- a/drivers/message/fusion/lsi/mpi_log_sas.h +++ b/drivers/message/fusion/lsi/mpi_log_sas.h @@ -1,6 +1,6 @@ /*************************************************************************** * * - * Copyright 2003 LSI Corporation. All rights reserved. * + * Copyright (c) 2000-2008 LSI Corporation. All rights reserved. * * * * Description * * ------------ * @@ -73,6 +73,8 @@ #define IOP_LOGINFO_CODE_TARGET_MODE_ABORT_EXACT_IO (0x00070004) #define IOP_LOGINFO_CODE_TARGET_MODE_ABORT_EXACT_IO_REQ (0x00070005) +#define IOP_LOGINFO_CODE_LOG_TIMESTAMP_EVENT (0x00080000) + /****************************************************************************/ /* PL LOGINFO_CODE defines, valid if IOC_LOGINFO_ORIGINATOR = PL */ /****************************************************************************/ @@ -92,7 +94,7 @@ #define PL_LOGINFO_SUB_CODE_OPEN_FAIL_OPEN_TIMEOUT_EXP (0x0000000C) #define PL_LOGINFO_SUB_CODE_OPEN_FAIL_UNUSED_0D (0x0000000D) #define PL_LOGINFO_SUB_CODE_OPEN_FAIL_DVTBLE_ACCSS_FAIL (0x0000000E) -#define PL_LOGINFO_SUB CODE_OPEN_FAIL_BAD_DEST (0x00000011) +#define PL_LOGINFO_SUB_CODE_OPEN_FAIL_BAD_DEST (0x00000011) #define PL_LOGINFO_SUB_CODE_OPEN_FAIL_RATE_NOT_SUPP (0x00000012) #define PL_LOGINFO_SUB_CODE_OPEN_FAIL_PROT_NOT_SUPP (0x00000013) #define PL_LOGINFO_SUB_CODE_OPEN_FAIL_RESERVED_ABANDON0 (0x00000014) @@ -159,10 +161,11 @@ #define PL_LOGINFO_SUB_CODE_INVALID_SGL (0x00000200) #define PL_LOGINFO_SUB_CODE_WRONG_REL_OFF_OR_FRAME_LENGTH (0x00000300) -#define PL_LOGINFO_SUB_CODE_FRAME_XFER_ERROR (0x00000400) /* Bits 0-3 encode Transport Status Register (offset 0x08) */ - /* Bit 0 is Status Bit 0: FrameXferErr */ - /* Bit 1 & 2 are Status Bits 16 and 17: FrameXmitErrStatus */ - /* Bit 3 is Status Bit 18 WriteDataLengthGTDataLengthErr */ +#define PL_LOGINFO_SUB_CODE_FRAME_XFER_ERROR (0x00000400) +/* Bits 0-3 encode Transport Status Register (offset 0x08) */ +/* Bit 0 is Status Bit 0: FrameXferErr */ +/* Bit 1 & 2 are Status Bits 16 and 17: FrameXmitErrStatus */ +/* Bit 3 is Status Bit 18 WriteDataLenghtGTDataLengthErr */ #define PL_LOGINFO_SUB_CODE_TX_FM_CONNECTED_LOW (0x00000500) #define PL_LOGINFO_SUB_CODE_SATA_NON_NCQ_RW_ERR_BIT_SET (0x00000600) @@ -177,6 +180,11 @@ #define PL_LOGINFO_SUB_CODE_DISCOVERY_REMOTE_SEP_RESET (0x00000E01) #define PL_LOGINFO_SUB_CODE_SECOND_OPEN (0x00000F00) #define PL_LOGINFO_SUB_CODE_DSCVRY_SATA_INIT_TIMEOUT (0x00001000) +#define PL_LOGINFO_SUB_CODE_BREAK_ON_SATA_CONNECTION (0x00002000) +/* not currently used in mainline */ +#define PL_LOGINFO_SUB_CODE_BREAK_ON_STUCK_LINK (0x00003000) +#define PL_LOGINFO_SUB_CODE_BREAK_ON_STUCK_LINK_AIP (0x00004000) +#define PL_LOGINFO_SUB_CODE_BREAK_ON_INCOMPLETE_BREAK_RCVD (0x00005000) #define PL_LOGINFO_CODE_ENCL_MGMT_SMP_FRAME_FAILURE (0x00200000) /* Can't get SMP Frame */ #define PL_LOGINFO_CODE_ENCL_MGMT_SMP_READ_ERROR (0x00200010) /* Error occured on SMP Read */ @@ -243,6 +251,8 @@ #define IR_LOGINFO_VOLUME_ACTIVATE_VOLUME_FAILED (0x00010014) /* Activation failed trying to import the volume */ #define IR_LOGINFO_VOLUME_ACTIVATING_IMPORT_VOLUME_FAILED (0x00010015) +/* Activation failed trying to import the volume */ +#define IR_LOGINFO_VOLUME_ACTIVATING_TOO_MANY_PHYS_DISKS (0x00010016) /* Phys Disk failed, too many phys disks */ #define IR_LOGINFO_PHYSDISK_CREATE_TOO_MANY_DISKS (0x00010020) @@ -285,6 +295,21 @@ /* Compatibility Error : IME size limited to < 2TB */ #define IR_LOGINFO_COMPAT_ERROR_IME_VOL_NOT_CURRENTLY_SUPPORTED (0x0001003D) +/* Device Firmware Update: DFU can only be started once */ +#define IR_LOGINFO_DEV_FW_UPDATE_ERR_DFU_IN_PROGRESS (0x00010050) +/* Device Firmware Update: Volume must be Optimal/Active/non-Quiesced */ +#define IR_LOGINFO_DEV_FW_UPDATE_ERR_DEVICE_IN_INVALID_STATE (0x00010051) +/* Device Firmware Update: DFU Timeout cannot be zero */ +#define IR_LOGINFO_DEV_FW_UPDATE_ERR_INVALID_TIMEOUT (0x00010052) +/* Device Firmware Update: CREATE TIMER FAILED */ +#define IR_LOGINFO_DEV_FW_UPDATE_ERR_NO_TIMERS (0x00010053) +/* Device Firmware Update: Failed to read SAS_IO_UNIT_PG_1 */ +#define IR_LOGINFO_DEV_FW_UPDATE_ERR_READING_CFG_PAGE (0x00010054) +/* Device Firmware Update: Invalid SAS_IO_UNIT_PG_1 value(s) */ +#define IR_LOGINFO_DEV_FW_UPDATE_ERR_PORT_IO_TIMEOUTS_REQUIRED (0x00010055) +/* Device Firmware Update: Unable to allocate memory for page */ +#define IR_LOGINFO_DEV_FW_UPDATE_ERR_ALLOC_CFG_PAGE (0x00010056) + /****************************************************************************/ /* Defines for convenience */ diff --git a/drivers/message/fusion/lsi/mpi_raid.h b/drivers/message/fusion/lsi/mpi_raid.h index 2856108421d..add60cc85be 100644 --- a/drivers/message/fusion/lsi/mpi_raid.h +++ b/drivers/message/fusion/lsi/mpi_raid.h @@ -1,12 +1,12 @@ /* - * Copyright (c) 2001-2007 LSI Corporation. + * Copyright (c) 2001-2008 LSI Corporation. * * * Name: mpi_raid.h * Title: MPI RAID message and structures * Creation Date: February 27, 2001 * - * mpi_raid.h Version: 01.05.03 + * mpi_raid.h Version: 01.05.05 * * Version History * --------------- @@ -34,6 +34,9 @@ * _SET_RESYNC_RATE and _SET_DATA_SCRUB_RATE. * 02-28-07 01.05.03 Added new RAID Action, Device FW Update Mode, and * associated defines. + * 08-07-07 01.05.04 Added Disable Full Rebuild bit to the ActionDataWord + * for the RAID Action MPI_RAID_ACTION_DISABLE_VOLUME. + * 01-15-08 01.05.05 Added define for MPI_RAID_ACTION_SET_VOLUME_NAME. * -------------------------------------------------------------------------- */ @@ -93,6 +96,7 @@ typedef struct _MSG_RAID_ACTION #define MPI_RAID_ACTION_SET_RESYNC_RATE (0x13) #define MPI_RAID_ACTION_SET_DATA_SCRUB_RATE (0x14) #define MPI_RAID_ACTION_DEVICE_FW_UPDATE_MODE (0x15) +#define MPI_RAID_ACTION_SET_VOLUME_NAME (0x16) /* ActionDataWord defines for use with MPI_RAID_ACTION_CREATE_VOLUME action */ #define MPI_RAID_ACTION_ADATA_DO_NOT_SYNC (0x00000001) @@ -105,6 +109,9 @@ typedef struct _MSG_RAID_ACTION #define MPI_RAID_ACTION_ADATA_KEEP_LBA0 (0x00000000) #define MPI_RAID_ACTION_ADATA_ZERO_LBA0 (0x00000002) +/* ActionDataWord defines for use with MPI_RAID_ACTION_DISABLE_VOLUME action */ +#define MPI_RAID_ACTION_ADATA_DISABLE_FULL_REBUILD (0x00000001) + /* ActionDataWord defines for use with MPI_RAID_ACTION_ACTIVATE_VOLUME action */ #define MPI_RAID_ACTION_ADATA_INACTIVATE_ALL (0x00000001) diff --git a/drivers/message/fusion/lsi/mpi_sas.h b/drivers/message/fusion/lsi/mpi_sas.h index 33fca83cefc..ab410036bbf 100644 --- a/drivers/message/fusion/lsi/mpi_sas.h +++ b/drivers/message/fusion/lsi/mpi_sas.h @@ -1,12 +1,12 @@ /* - * Copyright (c) 2004-2006 LSI Corporation. + * Copyright (c) 2004-2008 LSI Corporation. * * * Name: mpi_sas.h * Title: MPI Serial Attached SCSI structures and definitions * Creation Date: August 19, 2004 * - * mpi_sas.h Version: 01.05.04 + * mpi_sas.h Version: 01.05.05 * * Version History * --------------- @@ -23,6 +23,10 @@ * reply. * 10-11-06 01.05.04 Fixed the name of a define for Operation field of SAS IO * Unit Control request. + * 01-15-08 01.05.05 Added support for MPI_SAS_OP_SET_IOC_PARAMETER, + * including adding IOCParameter and IOCParameter value + * fields to SAS IO Unit Control Request. + * Added MPI_SAS_DEVICE_INFO_PRODUCT_SPECIFIC define. * -------------------------------------------------------------------------- */ @@ -60,6 +64,8 @@ * Values for the SAS DeviceInfo field used in SAS Device Status Change Event * data and SAS IO Unit Configuration pages. */ +#define MPI_SAS_DEVICE_INFO_PRODUCT_SPECIFIC (0xF0000000) + #define MPI_SAS_DEVICE_INFO_SEP (0x00004000) #define MPI_SAS_DEVICE_INFO_ATAPI_DEVICE (0x00002000) #define MPI_SAS_DEVICE_INFO_LSI_DEVICE (0x00001000) @@ -216,7 +222,7 @@ typedef struct _MSG_SAS_IOUNIT_CONTROL_REQUEST U8 ChainOffset; /* 02h */ U8 Function; /* 03h */ U16 DevHandle; /* 04h */ - U8 Reserved3; /* 06h */ + U8 IOCParameter; /* 06h */ U8 MsgFlags; /* 07h */ U32 MsgContext; /* 08h */ U8 TargetID; /* 0Ch */ @@ -225,7 +231,7 @@ typedef struct _MSG_SAS_IOUNIT_CONTROL_REQUEST U8 PrimFlags; /* 0Fh */ U32 Primitive; /* 10h */ U64 SASAddress; /* 14h */ - U32 Reserved4; /* 1Ch */ + U32 IOCParameterValue; /* 1Ch */ } MSG_SAS_IOUNIT_CONTROL_REQUEST, MPI_POINTER PTR_MSG_SAS_IOUNIT_CONTROL_REQUEST, SasIoUnitControlRequest_t, MPI_POINTER pSasIoUnitControlRequest_t; @@ -241,6 +247,8 @@ typedef struct _MSG_SAS_IOUNIT_CONTROL_REQUEST #define MPI_SAS_OP_TRANSMIT_PORT_SELECT_SIGNAL (0x0C) #define MPI_SAS_OP_TRANSMIT_REMOVE_DEVICE (0x0D) /* obsolete name */ #define MPI_SAS_OP_REMOVE_DEVICE (0x0D) +#define MPI_SAS_OP_SET_IOC_PARAMETER (0x0E) +#define MPI_SAS_OP_PRODUCT_SPECIFIC_MIN (0x80) /* values for the PrimFlags field */ #define MPI_SAS_PRIMFLAGS_SINGLE (0x08) @@ -256,7 +264,7 @@ typedef struct _MSG_SAS_IOUNIT_CONTROL_REPLY U8 MsgLength; /* 02h */ U8 Function; /* 03h */ U16 DevHandle; /* 04h */ - U8 Reserved3; /* 06h */ + U8 IOCParameter; /* 06h */ U8 MsgFlags; /* 07h */ U32 MsgContext; /* 08h */ U16 Reserved4; /* 0Ch */ diff --git a/drivers/message/fusion/lsi/mpi_targ.h b/drivers/message/fusion/lsi/mpi_targ.h index ff8c37d3fdc..c3dea7f6909 100644 --- a/drivers/message/fusion/lsi/mpi_targ.h +++ b/drivers/message/fusion/lsi/mpi_targ.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2004 LSI Corporation. + * Copyright (c) 2000-2008 LSI Corporation. * * * Name: mpi_targ.h diff --git a/drivers/message/fusion/lsi/mpi_tool.h b/drivers/message/fusion/lsi/mpi_tool.h index 8834ae6ce0f..53cd715aa7e 100644 --- a/drivers/message/fusion/lsi/mpi_tool.h +++ b/drivers/message/fusion/lsi/mpi_tool.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2001-2005 LSI Corporation. + * Copyright (c) 2001-2008 LSI Corporation. * * * Name: mpi_tool.h diff --git a/drivers/message/fusion/lsi/mpi_type.h b/drivers/message/fusion/lsi/mpi_type.h index 08dad9c1e44..888b26dbc41 100644 --- a/drivers/message/fusion/lsi/mpi_type.h +++ b/drivers/message/fusion/lsi/mpi_type.h @@ -1,12 +1,12 @@ /* - * Copyright (c) 2000-2004 LSI Corporation. + * Copyright (c) 2000-2008 LSI Corporation. * * * Name: mpi_type.h * Title: MPI Basic type definitions * Creation Date: June 6, 2000 * - * mpi_type.h Version: 01.05.01 + * mpi_type.h Version: 01.05.02 * * Version History * --------------- -- cgit From e382968ba618e016ff7922dff9a6140c2f9108c8 Mon Sep 17 00:00:00 2001 From: "Kashyap, Desai" Date: Thu, 8 Jan 2009 14:27:16 +0530 Subject: [SCSI] mpt fusion: Add separate msi enable disable for FC,SPI,SAS Added support for MSI enable/disable for different buses FC,SPI,SAS instead of having single MSI enable/disable feature. Signed-off-by: Kashyap Desai Signed-off-by: James Bottomley --- drivers/message/fusion/mptbase.c | 50 +++++++++++++++++++++++++++++----------- 1 file changed, 37 insertions(+), 13 deletions(-) diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index c4e8b9aa382..787a12648db 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -79,9 +79,22 @@ MODULE_VERSION(my_VERSION); /* * cmd line parameters */ -static int mpt_msi_enable = -1; -module_param(mpt_msi_enable, int, 0); -MODULE_PARM_DESC(mpt_msi_enable, " MSI Support Enable (default=0)"); + +static int mpt_msi_enable_spi; +module_param(mpt_msi_enable_spi, int, 0); +MODULE_PARM_DESC(mpt_msi_enable_spi, " Enable MSI Support for SPI \ + controllers (default=0)"); + +static int mpt_msi_enable_fc; +module_param(mpt_msi_enable_fc, int, 0); +MODULE_PARM_DESC(mpt_msi_enable_fc, " Enable MSI Support for FC \ + controllers (default=0)"); + +static int mpt_msi_enable_sas; +module_param(mpt_msi_enable_sas, int, 1); +MODULE_PARM_DESC(mpt_msi_enable_sas, " Enable MSI Support for SAS \ + controllers (default=1)"); + static int mpt_channel_mapping; module_param(mpt_channel_mapping, int, 0); @@ -91,7 +104,9 @@ static int mpt_debug_level; static int mpt_set_debug_level(const char *val, struct kernel_param *kp); module_param_call(mpt_debug_level, mpt_set_debug_level, param_get_int, &mpt_debug_level, 0600); -MODULE_PARM_DESC(mpt_debug_level, " debug level - refer to mptdebug.h - (default=0)"); +MODULE_PARM_DESC(mpt_debug_level, " debug level - refer to mptdebug.h \ + - (default=0)"); + #ifdef MFCNT static int mfcounter = 0; @@ -1751,16 +1766,25 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id) ioc->bus_type = SAS; } - if (mpt_msi_enable == -1) { - /* Enable on SAS, disable on FC and SPI */ - if (ioc->bus_type == SAS) - ioc->msi_enable = 1; - else - ioc->msi_enable = 0; - } else - /* follow flag: 0 - disable; 1 - enable */ - ioc->msi_enable = mpt_msi_enable; + switch (ioc->bus_type) { + + case SAS: + ioc->msi_enable = mpt_msi_enable_sas; + break; + + case SPI: + ioc->msi_enable = mpt_msi_enable_spi; + break; + + case FC: + ioc->msi_enable = mpt_msi_enable_fc; + break; + + default: + ioc->msi_enable = 0; + break; + } if (ioc->errata_flag_1064) pci_disable_io_access(pdev); -- cgit From 2f4c782c2e06fbaef2ac2b6b7abd796b96abd98b Mon Sep 17 00:00:00 2001 From: "Kashyap, Desai" Date: Tue, 6 Jan 2009 15:03:37 +0530 Subject: [SCSI] mpt fusion: Add Firmware debug support Signed-off-by: Kashyap Desai Signed-off-by: James Bottomley --- drivers/message/fusion/mptbase.c | 37 +++++++++++++++++++++++++++++++++++++ drivers/message/fusion/mptbase.h | 3 +++ drivers/message/fusion/mptscsih.c | 3 +++ 3 files changed, 43 insertions(+) diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index 787a12648db..96ac88317b8 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -107,6 +107,14 @@ module_param_call(mpt_debug_level, mpt_set_debug_level, param_get_int, MODULE_PARM_DESC(mpt_debug_level, " debug level - refer to mptdebug.h \ - (default=0)"); +int mpt_fwfault_debug; +EXPORT_SYMBOL(mpt_fwfault_debug); +module_param_call(mpt_fwfault_debug, param_set_int, param_get_int, + &mpt_fwfault_debug, 0600); +MODULE_PARM_DESC(mpt_fwfault_debug, "Enable detection of Firmware fault" + " and halt Firmware on fault - (default=0)"); + + #ifdef MFCNT static int mfcounter = 0; @@ -6337,6 +6345,33 @@ mpt_print_ioc_summary(MPT_ADAPTER *ioc, char *buffer, int *size, int len, int sh *size = y; } + +/** + * mpt_halt_firmware - Halts the firmware if it is operational and panic + * the kernel + * @ioc: Pointer to MPT_ADAPTER structure + * + **/ +void +mpt_halt_firmware(MPT_ADAPTER *ioc) +{ + u32 ioc_raw_state; + + ioc_raw_state = mpt_GetIocState(ioc, 0); + + if ((ioc_raw_state & MPI_IOC_STATE_MASK) == MPI_IOC_STATE_FAULT) { + printk(MYIOC_s_ERR_FMT "IOC is in FAULT state (%04xh)!!!\n", + ioc->name, ioc_raw_state & MPI_DOORBELL_DATA_MASK); + panic("%s: IOC Fault (%04xh)!!!\n", ioc->name, + ioc_raw_state & MPI_DOORBELL_DATA_MASK); + } else { + CHIPREG_WRITE32(&ioc->chip->Doorbell, 0xC0FFEE00); + panic("%s: Firmware is halted due to command timeout\n", + ioc->name); + } +} +EXPORT_SYMBOL(mpt_halt_firmware); + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /* * Reset Handling @@ -6369,6 +6404,8 @@ mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag) printk(MYIOC_s_INFO_FMT "HardResetHandler Entered!\n", ioc->name); printk("MF count 0x%x !\n", ioc->mfcnt); #endif + if (mpt_fwfault_debug) + mpt_halt_firmware(ioc); /* Reset the adapter. Prevent more than 1 call to * mpt_do_ioc_recovery at any instant in time. diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h index dff048cfa10..b3e981d2a50 100644 --- a/drivers/message/fusion/mptbase.h +++ b/drivers/message/fusion/mptbase.h @@ -922,11 +922,14 @@ extern void mpt_free_fw_memory(MPT_ADAPTER *ioc); extern int mpt_findImVolumes(MPT_ADAPTER *ioc); extern int mptbase_sas_persist_operation(MPT_ADAPTER *ioc, u8 persist_opcode); extern int mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num, pRaidPhysDiskPage0_t phys_disk); +extern void mpt_halt_firmware(MPT_ADAPTER *ioc); + /* * Public data decl's... */ extern struct list_head ioc_list; +extern int mpt_fwfault_debug; /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ #endif /* } __KERNEL__ */ diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index ee090413e59..e62c6bc4ad3 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -1846,6 +1846,9 @@ mptscsih_abort(struct scsi_cmnd * SCpnt) if (hd->timeouts < -1) hd->timeouts++; + if (mpt_fwfault_debug) + mpt_halt_firmware(ioc); + /* Most important! Set TaskMsgContext to SCpnt's MsgContext! * (the IO to be ABORT'd) * -- cgit From 2856830bd395fbc2f0c5327effe71fb025dd262d Mon Sep 17 00:00:00 2001 From: Karen Xie Date: Sat, 10 Jan 2009 19:06:07 -0800 Subject: [SCSI] iscsi_tcp: make padbuf non-static virt_to_page() call should not be used on kernel text and data addresses. virt_to_page() is used by sg_init_one(). So change padbuf to be allocated within iscsi_segment. Signed-off-by: Karen Xie Acked-by: Mike Christie Signed-off-by: James Bottomley --- drivers/scsi/libiscsi_tcp.c | 3 +-- include/scsi/libiscsi_tcp.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c index a745f91d292..e7705d3532c 100644 --- a/drivers/scsi/libiscsi_tcp.c +++ b/drivers/scsi/libiscsi_tcp.c @@ -177,7 +177,6 @@ int iscsi_tcp_segment_done(struct iscsi_tcp_conn *tcp_conn, struct iscsi_segment *segment, int recv, unsigned copied) { - static unsigned char padbuf[ISCSI_PAD_LEN]; struct scatterlist sg; unsigned int pad; @@ -233,7 +232,7 @@ int iscsi_tcp_segment_done(struct iscsi_tcp_conn *tcp_conn, debug_tcp("consume %d pad bytes\n", pad); segment->total_size += pad; segment->size = pad; - segment->data = padbuf; + segment->data = segment->padbuf; return 0; } } diff --git a/include/scsi/libiscsi_tcp.h b/include/scsi/libiscsi_tcp.h index 83e32f6d785..9e3182e659d 100644 --- a/include/scsi/libiscsi_tcp.h +++ b/include/scsi/libiscsi_tcp.h @@ -39,6 +39,7 @@ struct iscsi_segment { unsigned int total_copied; struct hash_desc *hash; + unsigned char padbuf[ISCSI_PAD_LEN]; unsigned char recv_digest[ISCSI_DIGEST_SIZE]; unsigned char digest[ISCSI_DIGEST_SIZE]; unsigned int digest_len; -- cgit From debf47779efd6eace440c884c8cca2665d966eb4 Mon Sep 17 00:00:00 2001 From: "ILLES, Marton" Date: Mon, 12 Jan 2009 15:45:52 +0100 Subject: [SCSI] Add SUN Universal Xport to no attach blacklist I was using a Sun ST2510 device (iSCSI) and a special "block device" appeared which is used by SUN Common Array Manager in-band management. However it also appeared as a block device and caused some IO error: [ 716.868000] scsi 15:0:0:31: Direct-Access SUN Universal Xport 0735 PQ: 0 ANSI: 5 [ 716.868000] qla4xxx 0000:04:01.1: scsi(15:0:0:31): Enabled tagged queuing, queue depth 32. [ 716.868000] sd 15:0:0:31: [sdf] 40960 512-byte hardware sectors (21 MB) [ 716.868000] sd 15:0:0:31: [sdf] Write Protect is off [ 716.868000] sd 15:0:0:31: [sdf] Mode Sense: 77 00 10 08 [ 716.868000] sd 15:0:0:31: [sdf] Write cache: disabled, read cache: enabled, supports DPO and FUA [ 716.868000] sd 15:0:0:31: [sdf] 40960 512-byte hardware sectors (21 MB) [ 716.868000] sd 15:0:0:31: [sdf] Write Protect is off [ 716.868000] sd 15:0:0:31: [sdf] Mode Sense: 77 00 10 08 [ 716.872000] sd 15:0:0:31: [sdf] Write cache: disabled, read cache: enabled, supports DPO and FUA [ 716.872000] sdf: unknown partition table [ 716.932000] sd 15:0:0:31: [sdf] Attached SCSI disk [ 716.932000] sd 15:0:0:31: Attached scsi generic sg6 type 0 [ 717.412000] end_request: I/O error, dev sdf, sector 40 [ 717.412000] Buffer I/O error on device sdf, logical block 5 [ 717.412000] Buffer I/O error on device sdf, logical block 6 [ 717.412000] Buffer I/O error on device sdf, logical block 7 [ 717.412000] Buffer I/O error on device sdf, logical block 8 [ 717.412000] Buffer I/O error on device sdf, logical block 9 [ 717.412000] Buffer I/O error on device sdf, logical block 10 [ 717.412000] Buffer I/O error on device sdf, logical block 11 [ 717.412000] Buffer I/O error on device sdf, logical block 12 [ 717.412000] Buffer I/O error on device sdf, logical block 13 [ 717.412000] Buffer I/O error on device sdf, logical block 14 After some googling it appeared that similar issue has been solved for SGI/IBM devices in 4869040512082b761de2d7c35975d01044f8bfea, so here is the patch for SUN, please apply. Signed-off-by: James Bottomley --- drivers/scsi/scsi_devinfo.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 4969e4ec75e..099b5455bbc 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -224,6 +224,7 @@ static struct { {"SGI", "TP9100", "*", BLIST_REPORTLUN2}, {"SGI", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"IBM", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, + {"SUN", "Universal Xport", "*", BLIST_NO_ULD_ATTACH}, {"SMSC", "USB 2 HS-CF", NULL, BLIST_SPARSELUN | BLIST_INQUIRY_36}, {"SONY", "CD-ROM CDU-8001", NULL, BLIST_BORKEN}, {"SONY", "TSL", NULL, BLIST_FORCELUN}, /* DDS3 & DDS4 autoloaders */ -- cgit From c107b41c485c43f15b24743e81eaab742b3c0b67 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 13 Jan 2009 17:46:37 +0100 Subject: ALSA: hda - Use queue_delayed_work() Replaced the old schedule_work() with queue_delayed_work() where overlooked in the previous patches. Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 3c596da2b9b..fdad6ae7aad 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2815,7 +2815,7 @@ void snd_hda_power_down(struct hda_codec *codec) return; if (power_save(codec)) { codec->power_transition = 1; /* avoid reentrance */ - schedule_delayed_work(&codec->power_work, + queue_delayed_work(codec->bus->workq, &codec->power_work, msecs_to_jiffies(power_save(codec) * 1000)); } } -- cgit From 32aeef605aa01e1fee45e052eceffb00e72ba2b0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 13 Jan 2009 16:50:37 +0100 Subject: [SCSI] Skip deleted devices in __scsi_device_lookup_by_target() __scsi_device_lookup_by_target() will always return the first sdev with a matching LUN, regardless of the state. However, when this sdev is in SDEV_DEL scsi_device_lookup_by_target() will ignore this device and so any valid device on the list after the deleted device will never be found. So we have to modify __scsi_device_lookup_by_target() to skip any device in SDEV_DEL. Signed-off-by: Hannes Reinecke Signed-off-by: James Bottomley --- drivers/scsi/scsi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 42e72a2c1f9..cbcd3f681b6 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -1095,7 +1095,8 @@ EXPORT_SYMBOL(__starget_for_each_device); * Description: Looks up the scsi_device with the specified @lun for a given * @starget. The returned scsi_device does not have an additional * reference. You must hold the host's host_lock over this call and - * any access to the returned scsi_device. + * any access to the returned scsi_device. A scsi_device in state + * SDEV_DEL is skipped. * * Note: The only reason why drivers should use this is because * they need to access the device list in irq context. Otherwise you @@ -1107,6 +1108,8 @@ struct scsi_device *__scsi_device_lookup_by_target(struct scsi_target *starget, struct scsi_device *sdev; list_for_each_entry(sdev, &starget->devices, same_target_siblings) { + if (sdev->sdev_state == SDEV_DEL) + continue; if (sdev->lun ==lun) return sdev; } -- cgit From 417bec5b0f25e000866f1be845d44a3ca0690697 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 13 Jan 2009 17:57:12 +0100 Subject: ALSA: hda - Update model descriptions in patch_sigmatel.c Update models in patch_sigmatel.c, mainly for the last Gateway updates. Signed-off-by: Takashi Iwai --- Documentation/sound/alsa/HD-Audio-Models.txt | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Documentation/sound/alsa/HD-Audio-Models.txt b/Documentation/sound/alsa/HD-Audio-Models.txt index 4b7ac21ea9e..64eb1100eec 100644 --- a/Documentation/sound/alsa/HD-Audio-Models.txt +++ b/Documentation/sound/alsa/HD-Audio-Models.txt @@ -275,7 +275,8 @@ STAC9200 dell-m25 Dell Inspiron E1505n dell-m26 Dell Inspiron 1501 dell-m27 Dell Inspiron E1705/9400 - gateway Gateway laptops with EAPD control + gateway-m4 Gateway laptops with EAPD control + gateway-m4-2 Gateway laptops with EAPD control panasonic Panasonic CF-74 STAC9205/9254 @@ -302,6 +303,7 @@ STAC9220/9221 macbook-pro Intel Mac Book Pro 2nd generation (eq. type 3) imac-intel Intel iMac (eq. type 2) imac-intel-20 Intel iMac (newer version) (eq. type 3) + ecs202 ECS/PC chips dell-d81 Dell (unknown) dell-d82 Dell (unknown) dell-m81 Dell (unknown) @@ -310,9 +312,13 @@ STAC9220/9221 STAC9202/9250/9251 ================== ref Reference board, base config + m1 Some Gateway MX series laptops (NX560XL) + m1-2 Some Gateway MX series laptops (MX6453) + m2 Some Gateway MX series laptops (M255) m2-2 Some Gateway MX series laptops + m3 Some Gateway MX series laptops + m5 Some Gateway MX series laptops (MP6954) m6 Some Gateway NX series laptops - pa6 Gateway NX860 series STAC9227/9228/9229/927x ======================= @@ -329,6 +335,7 @@ STAC92HD71B* dell-m4-1 Dell desktops dell-m4-2 Dell desktops dell-m4-3 Dell desktops + hp-m4 HP dv laptops STAC92HD73* =========== @@ -337,6 +344,7 @@ STAC92HD73* dell-m6-amic Dell desktops/laptops with analog mics dell-m6-dmic Dell desktops/laptops with digital mics dell-m6 Dell desktops/laptops with both type of mics + dell-eq Dell desktops/laptops STAC92HD83* =========== -- cgit From 5597b25c300eeaf43392f399990d3f3027a9b779 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Thu, 8 Jan 2009 18:11:56 -0600 Subject: powerpc/e500mc: Doorbells need to be taken w/exceptions disabled We use Doorbell interrupts for IPIs and thus we need to make sure we aren't interrupted in the process of processing the IPI. Signed-off-by: Kumar Gala Acked-by: Dave Liu --- arch/powerpc/kernel/head_fsl_booke.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index 2f32720a44a..36ffb3504a4 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -707,7 +707,7 @@ interrupt_base: EXCEPTION(0x2060, PerformanceMonitor, performance_monitor_exception, EXC_XFER_STD) #ifdef CONFIG_PPC_E500MC - EXCEPTION(0x2070, Doorbell, unknown_exception, EXC_XFER_EE) + EXCEPTION(0x2070, Doorbell, unknown_exception, EXC_XFER_STD) #endif /* Debug Interrupt */ -- cgit From 6c9789de2a02755101f757789ffcb17312f3fa9b Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sun, 11 Jan 2009 18:30:13 +0300 Subject: powerpc/83xx: Make serial ports work on MPC8315E-RDB w/ FSL U-Boots FSL U-Boots use /soc8315@e0000000 node to search and fixup serial nodes' clock-frequency properties. Though in upstream kernels we use new naming convention -- for IMMR address space dts files specify /immr@e0000000 nodes. This makes FSL U-Boots fail to fixup the clock frequencies, and that leads to serial ports misbehaviour. We can workaround the issue by filling the clock frequency values manually. p.s. For the same reason FSL U-Boots fail to fixup MAC addresses for ethernet nodes, so users should either change the .dts file locally or set MAC address via `ifconfig hw ether' command. Signed-off-by: Anton Vorontsov Signed-off-by: Kumar Gala --- arch/powerpc/boot/dts/mpc8315erdb.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/boot/dts/mpc8315erdb.dts b/arch/powerpc/boot/dts/mpc8315erdb.dts index 072c9b0f8c8..71784165b77 100644 --- a/arch/powerpc/boot/dts/mpc8315erdb.dts +++ b/arch/powerpc/boot/dts/mpc8315erdb.dts @@ -255,7 +255,7 @@ device_type = "serial"; compatible = "ns16550"; reg = <0x4500 0x100>; - clock-frequency = <0>; + clock-frequency = <133333333>; interrupts = <9 0x8>; interrupt-parent = <&ipic>; }; @@ -265,7 +265,7 @@ device_type = "serial"; compatible = "ns16550"; reg = <0x4600 0x100>; - clock-frequency = <0>; + clock-frequency = <133333333>; interrupts = <10 0x8>; interrupt-parent = <&ipic>; }; -- cgit From ea0105ea3881b409e362451690184af494bce6e3 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sun, 11 Jan 2009 19:55:39 +0300 Subject: powerpc/83xx: Move mcu_mpc8349emitx driver out of drivers/i2c/chips/ This patch is used to help Jean Delvare to get rid of drivers/i2c/chips/ directory. The new location suggested by Kumar Gala: as the driver is 83xx specific it's placed into arch/powerpc/platforms/83xx/. Signed-off-by: Anton Vorontsov Acked-by: Jean Delvare Signed-off-by: Kumar Gala --- arch/powerpc/platforms/83xx/Makefile | 1 + arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c | 209 +++++++++++++++++++++++++ arch/powerpc/platforms/Kconfig | 11 ++ drivers/i2c/chips/Kconfig | 11 -- drivers/i2c/chips/Makefile | 1 - drivers/i2c/chips/mcu_mpc8349emitx.c | 209 ------------------------- 6 files changed, 221 insertions(+), 221 deletions(-) create mode 100644 arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c delete mode 100644 drivers/i2c/chips/mcu_mpc8349emitx.c diff --git a/arch/powerpc/platforms/83xx/Makefile b/arch/powerpc/platforms/83xx/Makefile index ba5028e2989..051777c542c 100644 --- a/arch/powerpc/platforms/83xx/Makefile +++ b/arch/powerpc/platforms/83xx/Makefile @@ -3,6 +3,7 @@ # obj-y := misc.o usb.o obj-$(CONFIG_SUSPEND) += suspend.o suspend-asm.o +obj-$(CONFIG_MCU_MPC8349EMITX) += mcu_mpc8349emitx.o obj-$(CONFIG_MPC831x_RDB) += mpc831x_rdb.o obj-$(CONFIG_MPC832x_RDB) += mpc832x_rdb.o obj-$(CONFIG_MPC834x_MDS) += mpc834x_mds.o diff --git a/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c new file mode 100644 index 00000000000..82a9bcb858b --- /dev/null +++ b/arch/powerpc/platforms/83xx/mcu_mpc8349emitx.c @@ -0,0 +1,209 @@ +/* + * Power Management and GPIO expander driver for MPC8349E-mITX-compatible MCU + * + * Copyright (c) 2008 MontaVista Software, Inc. + * + * Author: Anton Vorontsov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * I don't have specifications for the MCU firmware, I found this register + * and bits positions by the trial&error method. + */ +#define MCU_REG_CTRL 0x20 +#define MCU_CTRL_POFF 0x40 + +#define MCU_NUM_GPIO 2 + +struct mcu { + struct mutex lock; + struct device_node *np; + struct i2c_client *client; + struct of_gpio_chip of_gc; + u8 reg_ctrl; +}; + +static struct mcu *glob_mcu; + +static void mcu_power_off(void) +{ + struct mcu *mcu = glob_mcu; + + pr_info("Sending power-off request to the MCU...\n"); + mutex_lock(&mcu->lock); + i2c_smbus_write_byte_data(glob_mcu->client, MCU_REG_CTRL, + mcu->reg_ctrl | MCU_CTRL_POFF); + mutex_unlock(&mcu->lock); +} + +static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) +{ + struct of_gpio_chip *of_gc = to_of_gpio_chip(gc); + struct mcu *mcu = container_of(of_gc, struct mcu, of_gc); + u8 bit = 1 << (4 + gpio); + + mutex_lock(&mcu->lock); + if (val) + mcu->reg_ctrl &= ~bit; + else + mcu->reg_ctrl |= bit; + + i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL, mcu->reg_ctrl); + mutex_unlock(&mcu->lock); +} + +static int mcu_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) +{ + mcu_gpio_set(gc, gpio, val); + return 0; +} + +static int mcu_gpiochip_add(struct mcu *mcu) +{ + struct device_node *np; + struct of_gpio_chip *of_gc = &mcu->of_gc; + struct gpio_chip *gc = &of_gc->gc; + int ret; + + np = of_find_compatible_node(NULL, NULL, "fsl,mcu-mpc8349emitx"); + if (!np) + return -ENODEV; + + gc->owner = THIS_MODULE; + gc->label = np->full_name; + gc->can_sleep = 1; + gc->ngpio = MCU_NUM_GPIO; + gc->base = -1; + gc->set = mcu_gpio_set; + gc->direction_output = mcu_gpio_dir_out; + of_gc->gpio_cells = 2; + of_gc->xlate = of_gpio_simple_xlate; + + np->data = of_gc; + mcu->np = np; + + /* + * We don't want to lose the node, its ->data and ->full_name... + * So, if succeeded, we don't put the node here. + */ + ret = gpiochip_add(gc); + if (ret) + of_node_put(np); + return ret; +} + +static int mcu_gpiochip_remove(struct mcu *mcu) +{ + int ret; + + ret = gpiochip_remove(&mcu->of_gc.gc); + if (ret) + return ret; + of_node_put(mcu->np); + + return 0; +} + +static int __devinit mcu_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct mcu *mcu; + int ret; + + mcu = kzalloc(sizeof(*mcu), GFP_KERNEL); + if (!mcu) + return -ENOMEM; + + mutex_init(&mcu->lock); + mcu->client = client; + i2c_set_clientdata(client, mcu); + + ret = i2c_smbus_read_byte_data(mcu->client, MCU_REG_CTRL); + if (ret < 0) + goto err; + mcu->reg_ctrl = ret; + + ret = mcu_gpiochip_add(mcu); + if (ret) + goto err; + + /* XXX: this is potentially racy, but there is no lock for ppc_md */ + if (!ppc_md.power_off) { + glob_mcu = mcu; + ppc_md.power_off = mcu_power_off; + dev_info(&client->dev, "will provide power-off service\n"); + } + + return 0; +err: + kfree(mcu); + return ret; +} + +static int __devexit mcu_remove(struct i2c_client *client) +{ + struct mcu *mcu = i2c_get_clientdata(client); + int ret; + + if (glob_mcu == mcu) { + ppc_md.power_off = NULL; + glob_mcu = NULL; + } + + ret = mcu_gpiochip_remove(mcu); + if (ret) + return ret; + i2c_set_clientdata(client, NULL); + kfree(mcu); + return 0; +} + +static const struct i2c_device_id mcu_ids[] = { + { "mcu-mpc8349emitx", }, + {}, +}; +MODULE_DEVICE_TABLE(i2c, mcu_ids); + +static struct i2c_driver mcu_driver = { + .driver = { + .name = "mcu-mpc8349emitx", + .owner = THIS_MODULE, + }, + .probe = mcu_probe, + .remove = __devexit_p(mcu_remove), + .id_table = mcu_ids, +}; + +static int __init mcu_init(void) +{ + return i2c_add_driver(&mcu_driver); +} +module_init(mcu_init); + +static void __exit mcu_exit(void) +{ + i2c_del_driver(&mcu_driver); +} +module_exit(mcu_exit); + +MODULE_DESCRIPTION("Power Management and GPIO expander driver for " + "MPC8349E-mITX-compatible MCU"); +MODULE_AUTHOR("Anton Vorontsov "); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 47fe2bea986..200b9cb900e 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -323,4 +323,15 @@ config SIMPLE_GPIO chip-selects, Ethernet/USB PHY's power and various other small on-board peripherals. +config MCU_MPC8349EMITX + tristate "MPC8349E-mITX MCU driver" + depends on I2C && PPC_83xx + select GENERIC_GPIO + select ARCH_REQUIRE_GPIOLIB + help + Say Y here to enable soft power-off functionality on the Freescale + boards with the MPC8349E-mITX-compatible MCU chips. This driver will + also register MCU GPIOs with the generic GPIO API, so you'll able + to use MCU pins as GPIOs. + endmenu diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig index 59c3d23f5bd..b9bef04b7be 100644 --- a/drivers/i2c/chips/Kconfig +++ b/drivers/i2c/chips/Kconfig @@ -139,15 +139,4 @@ config SENSORS_TSL2550 This driver can also be built as a module. If so, the module will be called tsl2550. -config MCU_MPC8349EMITX - tristate "MPC8349E-mITX MCU driver" - depends on I2C && PPC_83xx - select GENERIC_GPIO - select ARCH_REQUIRE_GPIOLIB - help - Say Y here to enable soft power-off functionality on the Freescale - boards with the MPC8349E-mITX-compatible MCU chips. This driver will - also register MCU GPIOs with the generic GPIO API, so you'll able - to use MCU pins as GPIOs. - endmenu diff --git a/drivers/i2c/chips/Makefile b/drivers/i2c/chips/Makefile index 83accaaf816..00fcb5193ac 100644 --- a/drivers/i2c/chips/Makefile +++ b/drivers/i2c/chips/Makefile @@ -19,7 +19,6 @@ obj-$(CONFIG_SENSORS_PCF8574) += pcf8574.o obj-$(CONFIG_PCF8575) += pcf8575.o obj-$(CONFIG_SENSORS_PCF8591) += pcf8591.o obj-$(CONFIG_SENSORS_TSL2550) += tsl2550.o -obj-$(CONFIG_MCU_MPC8349EMITX) += mcu_mpc8349emitx.o ifeq ($(CONFIG_I2C_DEBUG_CHIP),y) EXTRA_CFLAGS += -DDEBUG diff --git a/drivers/i2c/chips/mcu_mpc8349emitx.c b/drivers/i2c/chips/mcu_mpc8349emitx.c deleted file mode 100644 index 82a9bcb858b..00000000000 --- a/drivers/i2c/chips/mcu_mpc8349emitx.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Power Management and GPIO expander driver for MPC8349E-mITX-compatible MCU - * - * Copyright (c) 2008 MontaVista Software, Inc. - * - * Author: Anton Vorontsov - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * I don't have specifications for the MCU firmware, I found this register - * and bits positions by the trial&error method. - */ -#define MCU_REG_CTRL 0x20 -#define MCU_CTRL_POFF 0x40 - -#define MCU_NUM_GPIO 2 - -struct mcu { - struct mutex lock; - struct device_node *np; - struct i2c_client *client; - struct of_gpio_chip of_gc; - u8 reg_ctrl; -}; - -static struct mcu *glob_mcu; - -static void mcu_power_off(void) -{ - struct mcu *mcu = glob_mcu; - - pr_info("Sending power-off request to the MCU...\n"); - mutex_lock(&mcu->lock); - i2c_smbus_write_byte_data(glob_mcu->client, MCU_REG_CTRL, - mcu->reg_ctrl | MCU_CTRL_POFF); - mutex_unlock(&mcu->lock); -} - -static void mcu_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val) -{ - struct of_gpio_chip *of_gc = to_of_gpio_chip(gc); - struct mcu *mcu = container_of(of_gc, struct mcu, of_gc); - u8 bit = 1 << (4 + gpio); - - mutex_lock(&mcu->lock); - if (val) - mcu->reg_ctrl &= ~bit; - else - mcu->reg_ctrl |= bit; - - i2c_smbus_write_byte_data(mcu->client, MCU_REG_CTRL, mcu->reg_ctrl); - mutex_unlock(&mcu->lock); -} - -static int mcu_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) -{ - mcu_gpio_set(gc, gpio, val); - return 0; -} - -static int mcu_gpiochip_add(struct mcu *mcu) -{ - struct device_node *np; - struct of_gpio_chip *of_gc = &mcu->of_gc; - struct gpio_chip *gc = &of_gc->gc; - int ret; - - np = of_find_compatible_node(NULL, NULL, "fsl,mcu-mpc8349emitx"); - if (!np) - return -ENODEV; - - gc->owner = THIS_MODULE; - gc->label = np->full_name; - gc->can_sleep = 1; - gc->ngpio = MCU_NUM_GPIO; - gc->base = -1; - gc->set = mcu_gpio_set; - gc->direction_output = mcu_gpio_dir_out; - of_gc->gpio_cells = 2; - of_gc->xlate = of_gpio_simple_xlate; - - np->data = of_gc; - mcu->np = np; - - /* - * We don't want to lose the node, its ->data and ->full_name... - * So, if succeeded, we don't put the node here. - */ - ret = gpiochip_add(gc); - if (ret) - of_node_put(np); - return ret; -} - -static int mcu_gpiochip_remove(struct mcu *mcu) -{ - int ret; - - ret = gpiochip_remove(&mcu->of_gc.gc); - if (ret) - return ret; - of_node_put(mcu->np); - - return 0; -} - -static int __devinit mcu_probe(struct i2c_client *client, - const struct i2c_device_id *id) -{ - struct mcu *mcu; - int ret; - - mcu = kzalloc(sizeof(*mcu), GFP_KERNEL); - if (!mcu) - return -ENOMEM; - - mutex_init(&mcu->lock); - mcu->client = client; - i2c_set_clientdata(client, mcu); - - ret = i2c_smbus_read_byte_data(mcu->client, MCU_REG_CTRL); - if (ret < 0) - goto err; - mcu->reg_ctrl = ret; - - ret = mcu_gpiochip_add(mcu); - if (ret) - goto err; - - /* XXX: this is potentially racy, but there is no lock for ppc_md */ - if (!ppc_md.power_off) { - glob_mcu = mcu; - ppc_md.power_off = mcu_power_off; - dev_info(&client->dev, "will provide power-off service\n"); - } - - return 0; -err: - kfree(mcu); - return ret; -} - -static int __devexit mcu_remove(struct i2c_client *client) -{ - struct mcu *mcu = i2c_get_clientdata(client); - int ret; - - if (glob_mcu == mcu) { - ppc_md.power_off = NULL; - glob_mcu = NULL; - } - - ret = mcu_gpiochip_remove(mcu); - if (ret) - return ret; - i2c_set_clientdata(client, NULL); - kfree(mcu); - return 0; -} - -static const struct i2c_device_id mcu_ids[] = { - { "mcu-mpc8349emitx", }, - {}, -}; -MODULE_DEVICE_TABLE(i2c, mcu_ids); - -static struct i2c_driver mcu_driver = { - .driver = { - .name = "mcu-mpc8349emitx", - .owner = THIS_MODULE, - }, - .probe = mcu_probe, - .remove = __devexit_p(mcu_remove), - .id_table = mcu_ids, -}; - -static int __init mcu_init(void) -{ - return i2c_add_driver(&mcu_driver); -} -module_init(mcu_init); - -static void __exit mcu_exit(void) -{ - i2c_del_driver(&mcu_driver); -} -module_exit(mcu_exit); - -MODULE_DESCRIPTION("Power Management and GPIO expander driver for " - "MPC8349E-mITX-compatible MCU"); -MODULE_AUTHOR("Anton Vorontsov "); -MODULE_LICENSE("GPL"); -- cgit From 33966dd0e2f68f26943cd9ee93ec6abbc6547a8e Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Tue, 13 Jan 2009 16:04:36 -0800 Subject: tcp: splice as many packets as possible at once As spotted by Willy Tarreau, current splice() from tcp socket to pipe is not optimal. It processes at most one segment per call. This results in low performance and very high overhead due to syscall rate when splicing from interfaces which do not support LRO. Willy provided a patch inside tcp_splice_read(), but a better fix is to let tcp_read_sock() process as many segments as possible, so that tcp_rcv_space_adjust() and tcp_cleanup_rbuf() are called less often. With this change, splice() behaves like tcp_recvmsg(), being able to consume many skbs in one system call. With typical 1460 bytes of payload per frame, that means splice(SPLICE_F_NONBLOCK) can return 16*1460 = 23360 bytes. Signed-off-by: Willy Tarreau Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ce572f9dff0..48ada1b2d2c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -522,8 +522,12 @@ static int tcp_splice_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len) { struct tcp_splice_state *tss = rd_desc->arg.data; + int ret; - return skb_splice_bits(skb, offset, tss->pipe, tss->len, tss->flags); + ret = skb_splice_bits(skb, offset, tss->pipe, rd_desc->count, tss->flags); + if (ret > 0) + rd_desc->count -= ret; + return ret; } static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss) @@ -531,6 +535,7 @@ static int __tcp_splice_read(struct sock *sk, struct tcp_splice_state *tss) /* Store TCP splice context information in read_descriptor_t. */ read_descriptor_t rd_desc = { .arg.data = tss, + .count = tss->len, }; return tcp_read_sock(sk, &rd_desc, tcp_splice_data_recv); @@ -611,11 +616,13 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, tss.len -= ret; spliced += ret; + if (!timeo) + break; release_sock(sk); lock_sock(sk); if (sk->sk_err || sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || + (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current)) break; } -- cgit From 7a6046ebe28d6a5d259097dcb44162e5a6eca88e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 13 Jan 2009 17:38:24 -0800 Subject: sparc64: Fix UP build failure. sparc_ksyms_64.c includes asm/spinlock.h directly, which is a no-no. Even better, none of these exports are even necessary. All of these functions are inlines. Reported by Meelis Roos and Alexander Beregalov. Signed-off-by: David S. Miller --- arch/sparc/kernel/sparc_ksyms_64.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/sparc/kernel/sparc_ksyms_64.c b/arch/sparc/kernel/sparc_ksyms_64.c index da8f804feb4..0f26066a08d 100644 --- a/arch/sparc/kernel/sparc_ksyms_64.c +++ b/arch/sparc/kernel/sparc_ksyms_64.c @@ -9,7 +9,6 @@ #include #include -#include #include #include #include @@ -23,16 +22,6 @@ struct poll { short revents; }; -/* used by various drivers */ -#ifdef CONFIG_SMP -/* Out of line rw-locking implementation. */ -EXPORT_SYMBOL(__read_lock); -EXPORT_SYMBOL(__read_unlock); -EXPORT_SYMBOL(__write_lock); -EXPORT_SYMBOL(__write_unlock); -EXPORT_SYMBOL(__write_trylock); -#endif /* CONFIG_SMP */ - /* from helpers.S */ EXPORT_SYMBOL(__flushw_user); EXPORT_SYMBOL_GPL(real_hard_smp_processor_id); -- cgit From 7891cc818967e186be68caac32d84bfd0a3f0bd2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 13 Jan 2009 22:17:51 -0800 Subject: ipv6: Fix fib6_dump_table walker leak When a fib6 table dump is prematurely ended, we won't unlink its walker from the list. This causes all sorts of grief for other users of the list later. Reported-by: Chris Caputo Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 29c7c99e69f..52ee1dced2f 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -298,6 +298,10 @@ static void fib6_dump_end(struct netlink_callback *cb) struct fib6_walker_t *w = (void*)cb->args[2]; if (w) { + if (cb->args[4]) { + cb->args[4] = 0; + fib6_walker_unlink(w); + } cb->args[2] = 0; kfree(w); } @@ -330,15 +334,12 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, read_lock_bh(&table->tb6_lock); res = fib6_walk_continue(w); read_unlock_bh(&table->tb6_lock); - if (res != 0) { - if (res < 0) - fib6_walker_unlink(w); - goto end; + if (res <= 0) { + fib6_walker_unlink(w); + cb->args[4] = 0; } - fib6_walker_unlink(w); - cb->args[4] = 0; } -end: + return res; } -- cgit From 2acc9dcb609427a20463e529ef552dd93b469847 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 9 Jan 2009 16:57:14 +0100 Subject: ALSA: hda - Fix silent headphone output on Panasonic CF-74 CF-74 does the headphone/speaker switching on hardware, thus the driver shouldn't do any software-toggling of pins. Otherwise it results in a silent headphone output. This patch simply resets the hp_detect flag to fix the problem. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_sigmatel.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 0e6fc56fa37..89ff916a0ed 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -4467,6 +4467,12 @@ static int patch_stac9200(struct hda_codec *codec) return err; } + /* CF-74 has no headphone detection, and the driver should *NOT* + * do detection and HP/speaker toggle because the hardware does it. + */ + if (spec->board_config == STAC_9200_PANASONIC) + spec->hp_detect = 0; + codec->patch_ops = stac92xx_patch_ops; return 0; -- cgit From c15c5060fc32d7de7cde76aa61e98bae1334d82e Mon Sep 17 00:00:00 2001 From: Matthew Ranostay Date: Tue, 13 Jan 2009 13:30:07 -0500 Subject: ALSA: hda: stac92hd8xxx amp mixers Added amp nid for stac92hd8xxx families of codecs so the input amp mixer is created. Signed-off-by: Matthew Ranostay Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_sigmatel.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 89ff916a0ed..77fcc312a1f 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -336,6 +336,10 @@ static unsigned int stac92hd83xxx_pwr_mapping[4] = { 0x03, 0x0c, 0x10, 0x40, }; +static unsigned int stac92hd83xxx_amp_nids[1] = { + 0xc, +}; + static hda_nid_t stac92hd71bxx_pwr_nids[3] = { 0x0a, 0x0d, 0x0f }; @@ -4747,6 +4751,7 @@ static int patch_stac92hd83xxx(struct hda_codec *codec) spec->dmux_nids = stac92hd83xxx_dmux_nids; spec->adc_nids = stac92hd83xxx_adc_nids; spec->pwr_nids = stac92hd83xxx_pwr_nids; + spec->amp_nids = stac92hd83xxx_amp_nids; spec->pwr_mapping = stac92hd83xxx_pwr_mapping; spec->num_pwrs = ARRAY_SIZE(stac92hd83xxx_pwr_nids); spec->multiout.dac_nids = spec->dac_nids; @@ -4764,6 +4769,7 @@ static int patch_stac92hd83xxx(struct hda_codec *codec) spec->num_pins = ARRAY_SIZE(stac92hd83xxx_pin_nids); spec->num_dmuxes = ARRAY_SIZE(stac92hd83xxx_dmux_nids); spec->num_adcs = ARRAY_SIZE(stac92hd83xxx_adc_nids); + spec->num_amps = ARRAY_SIZE(stac92hd83xxx_amp_nids); spec->num_dmics = STAC92HD83XXX_NUM_DMICS; spec->dinput_mux = &stac92hd83xxx_dmux; spec->pin_nids = stac92hd83xxx_pin_nids; -- cgit From 8317e0b0c2234f5f1f5d54804e4093d11bc0dffa Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Jan 2009 07:56:51 +0100 Subject: ALSA: hda - Don't reset HP pinctl in patch_sigmatel.c Resetting HP pinctl at the unplugged state may cause a sort of regression on some devices because of their wrong pin configuration. A simple workaround is to disable the pin reset. This is ugly and may be not good from the power-saving POV (if any), but damn simple. Signed-off-by: Takashi Iwai Cc: stable@kernel.org --- sound/pci/hda/patch_sigmatel.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 77fcc312a1f..103ac6d743e 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -4225,8 +4225,19 @@ static void stac92xx_hp_detect(struct hda_codec *codec) continue; if (presence) stac92xx_set_pinctl(codec, cfg->hp_pins[i], val); +#if 0 /* FIXME */ +/* Resetting the pinctl like below may lead to (a sort of) regressions + * on some devices since they use the HP pin actually for line/speaker + * outs although the default pin config shows a different pin (that is + * wrong and useless). + * + * So, it's basically a problem of default pin configs, likely a BIOS issue. + * But, disabling the code below just works around it, and I'm too tired of + * bug reports with such devices... + */ else stac92xx_reset_pinctl(codec, cfg->hp_pins[i], val); +#endif /* FIXME */ } } -- cgit From 3e420e78ece6f9d2accc1568e80dfd0501e13df1 Mon Sep 17 00:00:00 2001 From: Luke Yelavich Date: Tue, 16 Dec 2008 12:37:47 +1100 Subject: ALSA: hda - Add automatic model setting for Samsung Q45 Have the Samsung Q45 (144d:c510) select ALC262_HIPPO by default Reference: Ubuntu bug 200210 http://launchpad.net/bugs/200210 Signed-off-by: Luke Yelavich Cc: stable@kernel.org Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ea4c88fe05c..82dd0843197 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -10573,6 +10573,7 @@ static struct snd_pci_quirk alc262_cfg_tbl[] = { SND_PCI_QUIRK(0x10cf, 0x142d, "Fujitsu Lifebook E8410", ALC262_FUJITSU), SND_PCI_QUIRK(0x144d, 0xc032, "Samsung Q1 Ultra", ALC262_ULTRA), SND_PCI_QUIRK(0x144d, 0xc039, "Samsung Q1U EL", ALC262_ULTRA), + SND_PCI_QUIRK(0x144d, 0xc510, "Samsung Q45", ALC262_HIPPO), SND_PCI_QUIRK(0x17aa, 0x384e, "Lenovo 3000 y410", ALC262_LENOVO_3000), SND_PCI_QUIRK(0x17ff, 0x0560, "Benq ED8", ALC262_BENQ_ED8), SND_PCI_QUIRK(0x17ff, 0x058d, "Benq T31-16", ALC262_BENQ_T31), -- cgit From 9248f2693921b143b54f380b60c945b28a7a5358 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Jan 2009 09:40:25 +0100 Subject: ALSA: hda - Fix stac92hd83xxx_amp_nids[] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the bug introduced in commit c15c5060fc32d7de7cde76aa61e98bae1334d82e: sound/pci/hda/patch_sigmatel.c: In function ‘patch_stac92hd83xxx’: sound/pci/hda/patch_sigmatel.c:4765: warning: assignment from incompatible pointer type Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_sigmatel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 103ac6d743e..25230f4c888 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -336,7 +336,7 @@ static unsigned int stac92hd83xxx_pwr_mapping[4] = { 0x03, 0x0c, 0x10, 0x40, }; -static unsigned int stac92hd83xxx_amp_nids[1] = { +static hda_nid_t stac92hd83xxx_amp_nids[1] = { 0xc, }; -- cgit From 5c6c9e6feccf771d060708fbbba9e0f67f8e0e67 Mon Sep 17 00:00:00 2001 From: "Signed-off-by: Peter Stokes" Date: Wed, 14 Jan 2009 09:47:57 +0100 Subject: ALSA: USB quirk for Logitech Quickcam Pro 9000 name The Logitech QuickCam Pro 9000 does not appear to any product identification strings in its USB device descriptor. Therefore it receives a device name of "USB Device 0x46d:0x990". Th e attached patch below adds a USB quirk to provide a more friendly name. Signed-off-by: Takashi Iwai --- sound/usb/usbquirks.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sound/usb/usbquirks.h b/sound/usb/usbquirks.h index 92115755d98..5d8ef09b9dc 100644 --- a/sound/usb/usbquirks.h +++ b/sound/usb/usbquirks.h @@ -128,6 +128,14 @@ .bInterfaceClass = USB_CLASS_AUDIO, .bInterfaceSubClass = USB_SUBCLASS_AUDIO_CONTROL }, +{ + USB_DEVICE(0x046d, 0x0990), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + .vendor_name = "Logitech, Inc.", + .product_name = "QuickCam Pro 9000", + .ifnum = QUIRK_NO_INTERFACE + } +}, /* * Yamaha devices -- cgit From 09b3ec7315a18d885127544204f1e389d41058d0 Mon Sep 17 00:00:00 2001 From: Frederik Deweerdt Date: Mon, 12 Jan 2009 22:35:42 +0100 Subject: x86, tlb flush_data: replace per_cpu with an array Impact: micro-optimization, memory reduction On x86_64 flush tlb data is stored in per_cpu variables. This is unnecessary because only the first NUM_INVALIDATE_TLB_VECTORS entries are accessed. This patch aims at making the code less confusing (there's nothing really "per_cpu") by using a plain array. It also would save some memory on most distros out there (Ubuntu x86_64 has NR_CPUS=64 by default). [ Ravikiran G Thirumalai also pointed out that the correct alignment is ____cacheline_internodealigned_in_smp, so that there's no bouncing on vsmp. ] Signed-off-by: Frederik Deweerdt Acked-by: Ravikiran Thirumalai Signed-off-by: Ingo Molnar --- arch/x86/kernel/tlb_64.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index f8be6f1d2e4..8cfea5d1451 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -33,7 +33,7 @@ * To avoid global state use 8 different call vectors. * Each CPU uses a specific vector to trigger flushes on other * CPUs. Depending on the received vector the target CPUs look into - * the right per cpu variable for the flush data. + * the right array slot for the flush data. * * With more than 8 CPUs they are hashed to the 8 available * vectors. The limited global vector space forces us to this right now. @@ -48,13 +48,13 @@ union smp_flush_state { unsigned long flush_va; spinlock_t tlbstate_lock; }; - char pad[SMP_CACHE_BYTES]; -} ____cacheline_aligned; + char pad[CONFIG_X86_INTERNODE_CACHE_BYTES]; +} ____cacheline_internodealigned_in_smp; /* State is put into the per CPU data section, but padded to a full cache line because other CPUs can access it and we don't want false sharing in the per cpu data segment. */ -static DEFINE_PER_CPU(union smp_flush_state, flush_state); +static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS]; /* * We cannot call mmdrop() because we are in interrupt context, @@ -129,7 +129,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) * Use that to determine where the sender put the data. */ sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; - f = &per_cpu(flush_state, sender); + f = &flush_state[sender]; if (!cpu_isset(cpu, f->flush_cpumask)) goto out; @@ -169,7 +169,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, /* Caller has disabled preemption */ sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; - f = &per_cpu(flush_state, sender); + f = &flush_state[sender]; /* * Could avoid this lock when @@ -205,8 +205,8 @@ static int __cpuinit init_smp_flush(void) { int i; - for_each_possible_cpu(i) - spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); + for (i = 0; i < ARRAY_SIZE(flush_state); i++) + spin_lock_init(&flush_state[i].tlbstate_lock); return 0; } -- cgit From 0a2a18b721abc960fbcada406746877d22340a60 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 12 Jan 2009 23:37:16 +0100 Subject: x86: change the default cache size to 64 bytes Right now the generic cacheline size is 128 bytes - that is wasteful when structures are aligned, as all modern x86 CPUs have an (effective) cacheline sizes of 64 bytes. It was set to 128 bytes due to some cacheline aliasing problems on older P4 systems, but those are many years old and we dont optimize for them anymore. (They'll still get the 128 bytes cacheline size if the kernel is specifically built for Pentium 4) Signed-off-by: Ingo Molnar Acked-by: Arjan van de Ven --- arch/x86/Kconfig.cpu | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8078955845a..cdf4a962323 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -307,10 +307,10 @@ config X86_CMPXCHG config X86_L1_CACHE_SHIFT int - default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC + default "7" if MPENTIUM4 || MPSC default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU config X86_XADD def_bool y -- cgit From 4c696ba7982501d43dea11dbbaabd2aa8a19cc42 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:53 +0100 Subject: [CVE-2009-0029] Move compat system call declarations to compat header file Move declarations to correct header file. Signed-off-by: Heiko Carstens --- include/linux/compat.h | 13 +++++++++++++ include/linux/syscalls.h | 12 ------------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/include/linux/compat.h b/include/linux/compat.h index e88f3ecf38b..3fd2194ff57 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -280,5 +280,18 @@ asmlinkage long compat_sys_timerfd_settime(int ufd, int flags, asmlinkage long compat_sys_timerfd_gettime(int ufd, struct compat_itimerspec __user *otmr); +asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page, + __u32 __user *pages, + const int __user *nodes, + int __user *status, + int flags); +asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, + struct compat_timeval __user *t); +asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename, + struct compat_stat __user *statbuf, + int flag); +asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename, + int flags, int mode); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 18d0a243a7b..a7593f670ca 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -530,11 +530,6 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, const int __user *nodes, int __user *status, int flags); -asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_page, - __u32 __user *pages, - const int __user *nodes, - int __user *status, - int flags); asmlinkage long sys_mbind(unsigned long start, unsigned long len, unsigned long mode, unsigned long __user *nmask, @@ -583,13 +578,6 @@ asmlinkage long sys_readlinkat(int dfd, const char __user *path, char __user *bu int bufsiz); asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __user *utimes, int flags); -asmlinkage long compat_sys_futimesat(unsigned int dfd, char __user *filename, - struct compat_timeval __user *t); -asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename, - struct compat_stat __user *statbuf, - int flag); -asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename, - int flags, int mode); asmlinkage long sys_unshare(unsigned long unshare_flags); asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, -- cgit From 2ed7c03ec17779afb4fcfa3b8c61df61bd4879ba Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:54 +0100 Subject: [CVE-2009-0029] Convert all system calls to return a long Convert all system calls to return a long. This should be a NOP since all converted types should have the same size anyway. With the exception of sys_exit_group which returned void. But that doesn't matter since the system call doesn't return. Signed-off-by: Heiko Carstens --- fs/read_write.c | 18 +++++------ fs/xattr.c | 12 ++++---- include/linux/syscalls.h | 79 ++++++++++++++++++++++++------------------------ ipc/mqueue.c | 2 +- kernel/exit.c | 4 ++- kernel/signal.c | 2 +- kernel/timer.c | 2 +- mm/filemap.c | 2 +- mm/mmap.c | 2 +- mm/mremap.c | 2 +- mm/nommu.c | 2 +- 11 files changed, 64 insertions(+), 63 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 5cc6924eb15..940367f51f2 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -147,7 +147,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int origin) } EXPORT_SYMBOL(vfs_llseek); -asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin) +asmlinkage long sys_lseek(unsigned int fd, off_t offset, unsigned int origin) { off_t retval; struct file * file; @@ -369,7 +369,7 @@ static inline void file_pos_write(struct file *file, loff_t pos) file->f_pos = pos; } -asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) +asmlinkage long sys_read(unsigned int fd, char __user * buf, size_t count) { struct file *file; ssize_t ret = -EBADF; @@ -386,7 +386,7 @@ asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) return ret; } -asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count) +asmlinkage long sys_write(unsigned int fd, const char __user * buf, size_t count) { struct file *file; ssize_t ret = -EBADF; @@ -403,7 +403,7 @@ asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t co return ret; } -asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, +asmlinkage long sys_pread64(unsigned int fd, char __user *buf, size_t count, loff_t pos) { struct file *file; @@ -424,7 +424,7 @@ asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, return ret; } -asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf, +asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, size_t count, loff_t pos) { struct file *file; @@ -672,7 +672,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, EXPORT_SYMBOL(vfs_writev); -asmlinkage ssize_t +asmlinkage long sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) { struct file *file; @@ -693,7 +693,7 @@ sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) return ret; } -asmlinkage ssize_t +asmlinkage long sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) { struct file *file; @@ -812,7 +812,7 @@ out: return retval; } -asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t count) +asmlinkage long sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t count) { loff_t pos; off_t off; @@ -831,7 +831,7 @@ asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t __user *offset, siz return do_sendfile(out_fd, in_fd, NULL, count, 0); } -asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count) +asmlinkage long sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count) { loff_t pos; ssize_t ret; diff --git a/fs/xattr.c b/fs/xattr.c index 237804cd6b5..d049ae27aae 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -349,7 +349,7 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, return error; } -asmlinkage ssize_t +asmlinkage long sys_getxattr(const char __user *pathname, const char __user *name, void __user *value, size_t size) { @@ -364,7 +364,7 @@ sys_getxattr(const char __user *pathname, const char __user *name, return error; } -asmlinkage ssize_t +asmlinkage long sys_lgetxattr(const char __user *pathname, const char __user *name, void __user *value, size_t size) { @@ -379,7 +379,7 @@ sys_lgetxattr(const char __user *pathname, const char __user *name, void __user return error; } -asmlinkage ssize_t +asmlinkage long sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size) { struct file *f; @@ -424,7 +424,7 @@ listxattr(struct dentry *d, char __user *list, size_t size) return error; } -asmlinkage ssize_t +asmlinkage long sys_listxattr(const char __user *pathname, char __user *list, size_t size) { struct path path; @@ -438,7 +438,7 @@ sys_listxattr(const char __user *pathname, char __user *list, size_t size) return error; } -asmlinkage ssize_t +asmlinkage long sys_llistxattr(const char __user *pathname, char __user *list, size_t size) { struct path path; @@ -452,7 +452,7 @@ sys_llistxattr(const char __user *pathname, char __user *list, size_t size) return error; } -asmlinkage ssize_t +asmlinkage long sys_flistxattr(int fd, char __user *list, size_t size) { struct file *f; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a7593f670ca..22290eeaf55 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -77,7 +77,7 @@ asmlinkage long sys_times(struct tms __user *tbuf); asmlinkage long sys_gettid(void); asmlinkage long sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp); -asmlinkage unsigned long sys_alarm(unsigned int seconds); +asmlinkage long sys_alarm(unsigned int seconds); asmlinkage long sys_getpid(void); asmlinkage long sys_getppid(void); asmlinkage long sys_getuid(void); @@ -166,7 +166,7 @@ asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, unsigned long flags); asmlinkage long sys_exit(int error_code); -asmlinkage void sys_exit_group(int error_code); +asmlinkage long sys_exit_group(int error_code); asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr, int options, struct rusage __user *ru); asmlinkage long sys_waitid(int which, pid_t pid, @@ -196,7 +196,7 @@ asmlinkage long sys_tkill(int pid, int sig); asmlinkage long sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo); asmlinkage long sys_sgetmask(void); asmlinkage long sys_ssetmask(int newmask); -asmlinkage unsigned long sys_signal(int sig, __sighandler_t handler); +asmlinkage long sys_signal(int sig, __sighandler_t handler); asmlinkage long sys_pause(void); asmlinkage long sys_sync(void); @@ -246,29 +246,29 @@ asmlinkage long sys_lsetxattr(const char __user *path, const char __user *name, const void __user *value, size_t size, int flags); asmlinkage long sys_fsetxattr(int fd, const char __user *name, const void __user *value, size_t size, int flags); -asmlinkage ssize_t sys_getxattr(const char __user *path, const char __user *name, - void __user *value, size_t size); -asmlinkage ssize_t sys_lgetxattr(const char __user *path, const char __user *name, - void __user *value, size_t size); -asmlinkage ssize_t sys_fgetxattr(int fd, const char __user *name, - void __user *value, size_t size); -asmlinkage ssize_t sys_listxattr(const char __user *path, char __user *list, - size_t size); -asmlinkage ssize_t sys_llistxattr(const char __user *path, char __user *list, - size_t size); -asmlinkage ssize_t sys_flistxattr(int fd, char __user *list, size_t size); +asmlinkage long sys_getxattr(const char __user *path, const char __user *name, + void __user *value, size_t size); +asmlinkage long sys_lgetxattr(const char __user *path, const char __user *name, + void __user *value, size_t size); +asmlinkage long sys_fgetxattr(int fd, const char __user *name, + void __user *value, size_t size); +asmlinkage long sys_listxattr(const char __user *path, char __user *list, + size_t size); +asmlinkage long sys_llistxattr(const char __user *path, char __user *list, + size_t size); +asmlinkage long sys_flistxattr(int fd, char __user *list, size_t size); asmlinkage long sys_removexattr(const char __user *path, const char __user *name); asmlinkage long sys_lremovexattr(const char __user *path, const char __user *name); asmlinkage long sys_fremovexattr(int fd, const char __user *name); -asmlinkage unsigned long sys_brk(unsigned long brk); +asmlinkage long sys_brk(unsigned long brk); asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot); -asmlinkage unsigned long sys_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr); +asmlinkage long sys_mremap(unsigned long addr, + unsigned long old_len, unsigned long new_len, + unsigned long flags, unsigned long new_addr); asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, unsigned long prot, unsigned long pgoff, unsigned long flags); @@ -321,10 +321,10 @@ asmlinkage long sys_io_submit(aio_context_t, long, struct iocb __user * __user *); asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, struct io_event __user *result); -asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, - off_t __user *offset, size_t count); -asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, - loff_t __user *offset, size_t count); +asmlinkage long sys_sendfile(int out_fd, int in_fd, + off_t __user *offset, size_t count); +asmlinkage long sys_sendfile64(int out_fd, int in_fd, + loff_t __user *offset, size_t count); asmlinkage long sys_readlink(const char __user *path, char __user *buf, int bufsiz); asmlinkage long sys_creat(const char __user *pathname, int mode); @@ -368,26 +368,25 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times); asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes); -asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, - unsigned int origin); +asmlinkage long sys_lseek(unsigned int fd, off_t offset, + unsigned int origin); asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, unsigned long offset_low, loff_t __user *result, unsigned int origin); -asmlinkage ssize_t sys_read(unsigned int fd, char __user *buf, - size_t count); -asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count); -asmlinkage ssize_t sys_readv(unsigned long fd, - const struct iovec __user *vec, - unsigned long vlen); -asmlinkage ssize_t sys_write(unsigned int fd, const char __user *buf, - size_t count); -asmlinkage ssize_t sys_writev(unsigned long fd, - const struct iovec __user *vec, - unsigned long vlen); -asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, - size_t count, loff_t pos); -asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf, - size_t count, loff_t pos); +asmlinkage long sys_read(unsigned int fd, char __user *buf, size_t count); +asmlinkage long sys_readahead(int fd, loff_t offset, size_t count); +asmlinkage long sys_readv(unsigned long fd, + const struct iovec __user *vec, + unsigned long vlen); +asmlinkage long sys_write(unsigned int fd, const char __user *buf, + size_t count); +asmlinkage long sys_writev(unsigned long fd, + const struct iovec __user *vec, + unsigned long vlen); +asmlinkage long sys_pread64(unsigned int fd, char __user *buf, + size_t count, loff_t pos); +asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, + size_t count, loff_t pos); asmlinkage long sys_getcwd(char __user *buf, unsigned long size); asmlinkage long sys_mkdir(const char __user *pathname, int mode); asmlinkage long sys_chdir(const char __user *filename); @@ -476,7 +475,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf); asmlinkage long sys_mq_open(const char __user *name, int oflag, mode_t mode, struct mq_attr __user *attr); asmlinkage long sys_mq_unlink(const char __user *name); asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *msg_ptr, size_t msg_len, unsigned int msg_prio, const struct timespec __user *abs_timeout); -asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout); +asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *msg_ptr, size_t msg_len, unsigned int __user *msg_prio, const struct timespec __user *abs_timeout); asmlinkage long sys_mq_notify(mqd_t mqdes, const struct sigevent __user *notification); asmlinkage long sys_mq_getsetattr(mqd_t mqdes, const struct mq_attr __user *mqstat, struct mq_attr __user *omqstat); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 23fdb8492b8..6df028b7054 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -907,7 +907,7 @@ out: return ret; } -asmlinkage ssize_t sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, +asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, size_t msg_len, unsigned int __user *u_msg_prio, const struct timespec __user *u_abs_timeout) { diff --git a/kernel/exit.c b/kernel/exit.c index c7740fa3252..fac9b040af2 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1182,9 +1182,11 @@ do_group_exit(int exit_code) * wait4()-ing process will get the correct exit code - even if this * thread is not the thread group leader. */ -asmlinkage void sys_exit_group(int error_code) +asmlinkage long sys_exit_group(int error_code) { do_group_exit((error_code & 0xff) << 8); + /* NOTREACHED */ + return 0; } static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) diff --git a/kernel/signal.c b/kernel/signal.c index 3152ac3b62e..856a5479d49 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2559,7 +2559,7 @@ sys_ssetmask(int newmask) /* * For backwards compatibility. Functionality superseded by sigaction. */ -asmlinkage unsigned long +asmlinkage long sys_signal(int sig, __sighandler_t handler) { struct k_sigaction new_sa, old_sa; diff --git a/kernel/timer.c b/kernel/timer.c index dee3f641a7a..7b8697d7f04 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1129,7 +1129,7 @@ void do_timer(unsigned long ticks) * For backwards compatibility? This can be done in libc so Alpha * and all newer ports shouldn't need it. */ -asmlinkage unsigned long sys_alarm(unsigned int seconds) +asmlinkage long sys_alarm(unsigned int seconds) { return alarm_setitimer(seconds); } diff --git a/mm/filemap.c b/mm/filemap.c index ceba0bd0366..538b75ed623 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1374,7 +1374,7 @@ do_readahead(struct address_space *mapping, struct file *filp, return 0; } -asmlinkage ssize_t sys_readahead(int fd, loff_t offset, size_t count) +asmlinkage long sys_readahead(int fd, loff_t offset, size_t count) { ssize_t ret; struct file *file; diff --git a/mm/mmap.c b/mm/mmap.c index 749623196cb..a970d890cb2 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -245,7 +245,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) return next; } -asmlinkage unsigned long sys_brk(unsigned long brk) +asmlinkage long sys_brk(unsigned long brk) { unsigned long rlim, retval; unsigned long newbrk, oldbrk; diff --git a/mm/mremap.c b/mm/mremap.c index 646de959aa5..5572e0825d8 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -420,7 +420,7 @@ out_nc: return ret; } -asmlinkage unsigned long sys_mremap(unsigned long addr, +asmlinkage long sys_mremap(unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) { diff --git a/mm/nommu.c b/mm/nommu.c index 60ed8375c98..ee3e7892773 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -416,7 +416,7 @@ EXPORT_SYMBOL(vm_insert_page); * to a regular file. in this case, the unmapping will need * to invoke file system routines that need the global lock. */ -asmlinkage unsigned long sys_brk(unsigned long brk) +asmlinkage long sys_brk(unsigned long brk) { struct mm_struct *mm = current->mm; -- cgit From e55380edf68796d75bf41391a781c68ee678587d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:55 +0100 Subject: [CVE-2009-0029] Rename old_readdir to sys_old_readdir This way it matches the generic system call name convention. Signed-off-by: Heiko Carstens --- arch/arm/kernel/calls.S | 2 +- arch/cris/arch-v10/kernel/entry.S | 2 +- arch/cris/arch-v32/kernel/entry.S | 2 +- arch/h8300/kernel/syscalls.S | 2 +- arch/m68k/kernel/entry.S | 2 +- arch/m68knommu/kernel/syscalltable.S | 2 +- arch/mips/kernel/scall32-o32.S | 2 +- arch/mn10300/kernel/entry.S | 2 +- arch/powerpc/include/asm/systbl.h | 2 +- arch/sh/kernel/syscalls_32.S | 2 +- arch/sh/kernel/syscalls_64.S | 2 +- arch/sparc/kernel/systbls_32.S | 2 +- arch/x86/kernel/syscall_table_32.S | 2 +- fs/readdir.c | 2 +- include/linux/syscalls.h | 2 ++ 15 files changed, 16 insertions(+), 14 deletions(-) diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 09a061cb783..9ca8d13f05f 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -98,7 +98,7 @@ CALL(sys_uselib) CALL(sys_swapon) CALL(sys_reboot) - CALL(OBSOLETE(old_readdir)) /* used by libc4 */ + CALL(OBSOLETE(sys_old_readdir)) /* used by libc4 */ /* 90 */ CALL(OBSOLETE(old_mmap)) /* used by libc4 */ CALL(sys_munmap) CALL(sys_truncate) diff --git a/arch/cris/arch-v10/kernel/entry.S b/arch/cris/arch-v10/kernel/entry.S index ed171d389e6..72f5cd319b9 100644 --- a/arch/cris/arch-v10/kernel/entry.S +++ b/arch/cris/arch-v10/kernel/entry.S @@ -691,7 +691,7 @@ sys_call_table: .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/cris/arch-v32/kernel/entry.S b/arch/cris/arch-v32/kernel/entry.S index 7f6f93e6b70..5e674c8f7c5 100644 --- a/arch/cris/arch-v32/kernel/entry.S +++ b/arch/cris/arch-v32/kernel/entry.S @@ -614,7 +614,7 @@ sys_call_table: .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/h8300/kernel/syscalls.S b/arch/h8300/kernel/syscalls.S index 54e21c3f205..4eb67faac63 100644 --- a/arch/h8300/kernel/syscalls.S +++ b/arch/h8300/kernel/syscalls.S @@ -103,7 +103,7 @@ SYMBOL_NAME_LABEL(sys_call_table) .long SYMBOL_NAME(sys_uselib) .long SYMBOL_NAME(sys_swapon) .long SYMBOL_NAME(sys_reboot) - .long SYMBOL_NAME(old_readdir) + .long SYMBOL_NAME(sys_old_readdir) .long SYMBOL_NAME(old_mmap) /* 90 */ .long SYMBOL_NAME(sys_munmap) .long SYMBOL_NAME(sys_truncate) diff --git a/arch/m68k/kernel/entry.S b/arch/m68k/kernel/entry.S index 5b780826647..5c332f2b9b8 100644 --- a/arch/m68k/kernel/entry.S +++ b/arch/m68k/kernel/entry.S @@ -513,7 +513,7 @@ sys_call_table: .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S index 812f8d8b7a8..5c3e3f62194 100644 --- a/arch/m68knommu/kernel/syscalltable.S +++ b/arch/m68knommu/kernel/syscalltable.S @@ -107,7 +107,7 @@ ENTRY(sys_call_table) .long sys_uselib .long sys_ni_syscall /* sys_swapon */ .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index d0916a55cd7..51d1ba415b9 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -398,7 +398,7 @@ einval: li v0, -ENOSYS sys sys_uselib 1 sys sys_swapon 2 sys sys_reboot 3 - sys old_readdir 3 + sys sys_old_readdir 3 sys old_mmap 6 /* 4090 */ sys sys_munmap 2 sys sys_truncate 2 diff --git a/arch/mn10300/kernel/entry.S b/arch/mn10300/kernel/entry.S index 62fba8aa9b6..ceeaaaa359e 100644 --- a/arch/mn10300/kernel/entry.S +++ b/arch/mn10300/kernel/entry.S @@ -478,7 +478,7 @@ ENTRY(sys_call_table) .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/powerpc/include/asm/systbl.h b/arch/powerpc/include/asm/systbl.h index 803def23665..72353f6070a 100644 --- a/arch/powerpc/include/asm/systbl.h +++ b/arch/powerpc/include/asm/systbl.h @@ -92,7 +92,7 @@ COMPAT_SYS_SPU(readlink) SYSCALL(uselib) SYSCALL(swapon) SYSCALL(reboot) -SYSX(sys_ni_syscall,compat_sys_old_readdir,old_readdir) +SYSX(sys_ni_syscall,compat_sys_old_readdir,sys_old_readdir) SYSCALL_SPU(mmap) SYSCALL_SPU(munmap) SYSCALL_SPU(truncate) diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S index 0af693e6576..a87ce076cfa 100644 --- a/arch/sh/kernel/syscalls_32.S +++ b/arch/sh/kernel/syscalls_32.S @@ -105,7 +105,7 @@ ENTRY(sys_call_table) .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/sh/kernel/syscalls_64.S b/arch/sh/kernel/syscalls_64.S index 0b436aa3cad..557cb91f5ca 100644 --- a/arch/sh/kernel/syscalls_64.S +++ b/arch/sh/kernel/syscalls_64.S @@ -109,7 +109,7 @@ sys_call_table: .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 7d080758644..8a434f51ba0 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -56,7 +56,7 @@ sys_call_table: /*185*/ .long sys_setpgid, sys_fremovexattr, sys_tkill, sys_exit_group, sys_newuname /*190*/ .long sys_init_module, sys_personality, sparc_remap_file_pages, sys_epoll_create, sys_epoll_ctl /*195*/ .long sys_epoll_wait, sys_ioprio_set, sys_getppid, sparc_sigaction, sys_sgetmask -/*200*/ .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, old_readdir +/*200*/ .long sys_ssetmask, sys_sigsuspend, sys_newlstat, sys_uselib, sys_old_readdir /*205*/ .long sys_readahead, sys_socketcall, sys_syslog, sys_lookup_dcookie, sys_fadvise64 /*210*/ .long sys_fadvise64_64, sys_tgkill, sys_waitpid, sys_swapoff, sys_sysinfo /*215*/ .long sys_ipc, sys_sigreturn, sys_clone, sys_ioprio_get, sys_adjtimex diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d44395ff34c..e2e86a08f31 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -88,7 +88,7 @@ ENTRY(sys_call_table) .long sys_uselib .long sys_swapon .long sys_reboot - .long old_readdir + .long sys_old_readdir .long old_mmap /* 90 */ .long sys_munmap .long sys_truncate diff --git a/fs/readdir.c b/fs/readdir.c index b318d9b5af2..8b4c2a0051a 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -102,7 +102,7 @@ efault: return -EFAULT; } -asmlinkage long old_readdir(unsigned int fd, struct old_linux_dirent __user * dirent, unsigned int count) +asmlinkage long sys_old_readdir(unsigned int fd, struct old_linux_dirent __user * dirent, unsigned int count) { int error; struct file * file; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 22290eeaf55..ca079c3d09e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -54,6 +54,7 @@ struct compat_stat; struct compat_timeval; struct robust_list_head; struct getcpu_cache; +struct old_linux_dirent; #include #include @@ -608,6 +609,7 @@ asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr); asmlinkage long sys_eventfd(unsigned int count); asmlinkage long sys_eventfd2(unsigned int count, int flags); asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); +asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -- cgit From 1134723e96f6e2abcf8bfd7a2d1c96fcc323ef35 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:56 +0100 Subject: [CVE-2009-0029] Remove __attribute__((weak)) from sys_pipe/sys_pipe2 Remove __attribute__((weak)) from common code sys_pipe implemantation. IA64, ALPHA, SUPERH (32bit) and SPARC (32bit) have own implemantations with the same name. Just rename them. For sys_pipe2 there is no architecture specific implementation. Cc: Richard Henderson Cc: David S. Miller Cc: Paul Mundt Cc: Tony Luck Signed-off-by: Heiko Carstens --- arch/alpha/kernel/entry.S | 8 ++++---- arch/alpha/kernel/systbls.S | 2 +- arch/ia64/ia32/ia32_entry.S | 2 +- arch/ia64/include/asm/unistd.h | 2 +- arch/ia64/kernel/entry.S | 2 +- arch/ia64/kernel/sys_ia64.c | 2 +- arch/s390/kernel/entry.h | 1 - arch/sh/include/asm/syscalls_32.h | 6 +++--- arch/sh/kernel/sys_sh32.c | 2 +- arch/sh/kernel/syscalls_32.S | 2 +- arch/sparc/kernel/entry.S | 4 ++-- arch/sparc/kernel/syscalls.S | 2 +- arch/sparc/kernel/systbls_32.S | 2 +- arch/sparc/kernel/systbls_64.S | 4 ++-- fs/pipe.c | 4 ++-- 15 files changed, 22 insertions(+), 23 deletions(-) diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index f77345bc66a..aa2e50cf985 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -896,9 +896,9 @@ sys_getxpid: .end sys_getxpid .align 4 - .globl sys_pipe - .ent sys_pipe -sys_pipe: + .globl sys_alpha_pipe + .ent sys_alpha_pipe +sys_alpha_pipe: lda $sp, -16($sp) stq $26, 0($sp) .prologue 0 @@ -916,7 +916,7 @@ sys_pipe: stq $1, 80+16($sp) 1: lda $sp, 16($sp) ret -.end sys_pipe +.end sys_alpha_pipe .align 4 .globl sys_execve diff --git a/arch/alpha/kernel/systbls.S b/arch/alpha/kernel/systbls.S index ba914af18c4..9d9e3a98bb9 100644 --- a/arch/alpha/kernel/systbls.S +++ b/arch/alpha/kernel/systbls.S @@ -52,7 +52,7 @@ sys_call_table: .quad sys_setpgid .quad alpha_ni_syscall /* 40 */ .quad sys_dup - .quad sys_pipe + .quad sys_alpha_pipe .quad osf_set_program_attributes .quad alpha_ni_syscall .quad sys_open /* 45 */ diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index a8cf1995885..a46f8395e9a 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -220,7 +220,7 @@ ia32_syscall_table: data8 sys_mkdir data8 sys_rmdir /* 40 */ data8 sys_dup - data8 sys_pipe + data8 sys_ia64_pipe data8 compat_sys_times data8 sys_ni_syscall /* old prof syscall holder */ data8 sys32_brk /* 45 */ diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index f791576355a..9015979ebe0 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -364,7 +364,7 @@ struct pt_regs; struct sigaction; long sys_execve(char __user *filename, char __user * __user *argv, char __user * __user *envp, struct pt_regs *regs); -asmlinkage long sys_pipe(void); +asmlinkage long sys_ia64_pipe(void); asmlinkage long sys_rt_sigaction(int sig, const struct sigaction __user *act, struct sigaction __user *oact, diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index d435f4a7a96..e5341e2c117 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1442,7 +1442,7 @@ sys_call_table: data8 sys_mkdir // 1055 data8 sys_rmdir data8 sys_dup - data8 sys_pipe + data8 sys_ia64_pipe data8 sys_times data8 ia64_brk // 1060 data8 sys_setgid diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index bcbb6d8792d..92ed83f3403 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -154,7 +154,7 @@ out: * and r9) as this is faster than doing a copy_to_user(). */ asmlinkage long -sys_pipe (void) +sys_ia64_pipe (void) { struct pt_regs *regs = task_pt_regs(current); int fd[2]; diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index a65afc91e8a..5c9b5b37004 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -30,7 +30,6 @@ struct fadvise64_64_args; struct old_sigaction; struct sel_arg_struct; -long sys_pipe(unsigned long __user *fildes); long sys_mmap2(struct mmap_arg_struct __user *arg); long old_mmap(struct mmap_arg_struct __user *arg); long sys_ipc(uint call, int first, unsigned long second, diff --git a/arch/sh/include/asm/syscalls_32.h b/arch/sh/include/asm/syscalls_32.h index 104c5e68610..8b30200305c 100644 --- a/arch/sh/include/asm/syscalls_32.h +++ b/arch/sh/include/asm/syscalls_32.h @@ -36,9 +36,9 @@ asmlinkage int sys_sigreturn(unsigned long r4, unsigned long r5, asmlinkage int sys_rt_sigreturn(unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7, struct pt_regs __regs); -asmlinkage int sys_pipe(unsigned long r4, unsigned long r5, - unsigned long r6, unsigned long r7, - struct pt_regs __regs); +asmlinkage int sys_sh_pipe(unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7, + struct pt_regs __regs); asmlinkage ssize_t sys_pread_wrapper(unsigned int fd, char __user *buf, size_t count, long dummy, loff_t pos); asmlinkage ssize_t sys_pwrite_wrapper(unsigned int fd, const char __user *buf, diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index dbba1e1833d..63ba12836ea 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -22,7 +22,7 @@ * sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way Unix traditionally does this, though. */ -asmlinkage int sys_pipe(unsigned long r4, unsigned long r5, +asmlinkage int sys_sh_pipe(unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7, struct pt_regs __regs) { diff --git a/arch/sh/kernel/syscalls_32.S b/arch/sh/kernel/syscalls_32.S index a87ce076cfa..e67c1733e1b 100644 --- a/arch/sh/kernel/syscalls_32.S +++ b/arch/sh/kernel/syscalls_32.S @@ -58,7 +58,7 @@ ENTRY(sys_call_table) .long sys_mkdir .long sys_rmdir /* 40 */ .long sys_dup - .long sys_pipe + .long sys_sh_pipe .long sys_times .long sys_ni_syscall /* old prof syscall holder */ .long sys_brk /* 45 */ diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S index faf9ccd9ef5..f41ecc5ac0b 100644 --- a/arch/sparc/kernel/entry.S +++ b/arch/sparc/kernel/entry.S @@ -1088,8 +1088,8 @@ sunos_execv: ld [%sp + STACKFRAME_SZ + PT_I0], %o0 .align 4 - .globl sys_pipe -sys_pipe: + .globl sys_sparc_pipe +sys_sparc_pipe: mov %o7, %l5 add %sp, STACKFRAME_SZ, %o0 ! pt_regs *regs arg call sparc_pipe diff --git a/arch/sparc/kernel/syscalls.S b/arch/sparc/kernel/syscalls.S index 7a6786a7136..87f5a3b8a25 100644 --- a/arch/sparc/kernel/syscalls.S +++ b/arch/sparc/kernel/syscalls.S @@ -20,7 +20,7 @@ execve_merge: add %sp, PTREGS_OFF, %o0 .align 32 -sys_pipe: +sys_sparc_pipe: ba,pt %xcc, sparc_pipe add %sp, PTREGS_OFF, %o0 sys_nis_syscall: diff --git a/arch/sparc/kernel/systbls_32.S b/arch/sparc/kernel/systbls_32.S index 8a434f51ba0..dccc95df0c7 100644 --- a/arch/sparc/kernel/systbls_32.S +++ b/arch/sparc/kernel/systbls_32.S @@ -24,7 +24,7 @@ sys_call_table: /*25*/ .long sys_vmsplice, sys_ptrace, sys_alarm, sys_sigaltstack, sys_pause /*30*/ .long sys_utime, sys_lchown, sys_fchown, sys_access, sys_nice /*35*/ .long sys_chown, sys_sync, sys_kill, sys_newstat, sys_sendfile -/*40*/ .long sys_newlstat, sys_dup, sys_pipe, sys_times, sys_getuid +/*40*/ .long sys_newlstat, sys_dup, sys_sparc_pipe, sys_times, sys_getuid /*45*/ .long sys_umount, sys_setgid16, sys_getgid16, sys_signal, sys_geteuid16 /*50*/ .long sys_getegid16, sys_acct, sys_nis_syscall, sys_getgid, sys_ioctl /*55*/ .long sys_reboot, sys_mmap2, sys_symlink, sys_readlink, sys_execve diff --git a/arch/sparc/kernel/systbls_64.S b/arch/sparc/kernel/systbls_64.S index 9fc78cf354b..e6007bb3704 100644 --- a/arch/sparc/kernel/systbls_64.S +++ b/arch/sparc/kernel/systbls_64.S @@ -26,7 +26,7 @@ sys_call_table32: /*25*/ .word sys32_vmsplice, compat_sys_ptrace, sys_alarm, sys32_sigaltstack, sys_pause /*30*/ .word compat_sys_utime, sys_lchown, sys_fchown, sys32_access, sys32_nice .word sys_chown, sys_sync, sys32_kill, compat_sys_newstat, sys32_sendfile -/*40*/ .word compat_sys_newlstat, sys_dup, sys_pipe, compat_sys_times, sys_getuid +/*40*/ .word compat_sys_newlstat, sys_dup, sys_sparc_pipe, compat_sys_times, sys_getuid .word sys32_umount, sys_setgid16, sys_getgid16, sys32_signal, sys_geteuid16 /*50*/ .word sys_getegid16, sys_acct, sys_nis_syscall, sys_getgid, compat_sys_ioctl .word sys32_reboot, sys32_mmap2, sys_symlink, sys32_readlink, sys32_execve @@ -100,7 +100,7 @@ sys_call_table: /*25*/ .word sys_vmsplice, sys_ptrace, sys_alarm, sys_sigaltstack, sys_nis_syscall /*30*/ .word sys_utime, sys_nis_syscall, sys_nis_syscall, sys_access, sys_nice .word sys_nis_syscall, sys_sync, sys_kill, sys_newstat, sys_sendfile64 -/*40*/ .word sys_newlstat, sys_dup, sys_pipe, sys_times, sys_nis_syscall +/*40*/ .word sys_newlstat, sys_dup, sys_sparc_pipe, sys_times, sys_nis_syscall .word sys_umount, sys_setgid, sys_getgid, sys_signal, sys_geteuid /*50*/ .word sys_getegid, sys_acct, sys_memory_ordering, sys_nis_syscall, sys_ioctl .word sys_reboot, sys_nis_syscall, sys_symlink, sys_readlink, sys_execve diff --git a/fs/pipe.c b/fs/pipe.c index 891697112f6..0c64db86c91 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1043,7 +1043,7 @@ int do_pipe(int *fd) * sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way Unix traditionally does this, though. */ -asmlinkage long __weak sys_pipe2(int __user *fildes, int flags) +asmlinkage long sys_pipe2(int __user *fildes, int flags) { int fd[2]; int error; @@ -1059,7 +1059,7 @@ asmlinkage long __weak sys_pipe2(int __user *fildes, int flags) return error; } -asmlinkage long __weak sys_pipe(int __user *fildes) +asmlinkage long sys_pipe(int __user *fildes) { return sys_pipe2(fildes, 0); } -- cgit From c9da9f2129d6a421c32e334a83770a9e67f7feac Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:57 +0100 Subject: [CVE-2009-0029] Make sys_pselect7 static Not a single architecture has wired up sys_pselect7 plus it is the only system call with seven parameters. Just make it static and rename it to do_pselect which will do the work for sys_pselect6. Signed-off-by: Heiko Carstens --- fs/compat.c | 6 +++--- fs/select.c | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/compat.c b/fs/compat.c index 30f2faa22f5..65a070e705a 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1709,7 +1709,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, } #ifdef HAVE_SET_RESTORE_SIGMASK -asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, +static long do_compat_pselect(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask, compat_size_t sigsetsize) @@ -1775,8 +1775,8 @@ asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, (compat_size_t __user *)(sig+sizeof(up)))) return -EFAULT; } - return compat_sys_pselect7(n, inp, outp, exp, tsp, compat_ptr(up), - sigsetsize); + return do_compat_pselect(n, inp, outp, exp, tsp, compat_ptr(up), + sigsetsize); } asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, diff --git a/fs/select.c b/fs/select.c index b0cf1f0896d..d1651648be1 100644 --- a/fs/select.c +++ b/fs/select.c @@ -582,9 +582,9 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, } #ifdef HAVE_SET_RESTORE_SIGMASK -asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, struct timespec __user *tsp, - const sigset_t __user *sigmask, size_t sigsetsize) +static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, + fd_set __user *exp, struct timespec __user *tsp, + const sigset_t __user *sigmask, size_t sigsetsize) { sigset_t ksigmask, sigsaved; struct timespec ts, end_time, *to = NULL; @@ -650,7 +650,7 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, return -EFAULT; } - return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize); + return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize); } #endif /* HAVE_SET_RESTORE_SIGMASK */ -- cgit From f627a741d24f12955fa2d9f8831c3b12860635bd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:58 +0100 Subject: [CVE-2009-0029] Make sys_syslog a conditional system call Remove the -ENOSYS implementation for !CONFIG_PRINTK and use the cond_syscall infrastructure instead. Acked-by: Kyle McMartin Signed-off-by: Heiko Carstens --- kernel/printk.c | 5 ----- kernel/sys_ni.c | 1 + 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/kernel/printk.c b/kernel/printk.c index 7015733793e..e48cf33783f 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -742,11 +742,6 @@ EXPORT_SYMBOL(vprintk); #else -asmlinkage long sys_syslog(int type, char __user *buf, int len) -{ - return -ENOSYS; -} - static void call_console_drivers(unsigned start, unsigned end) { } diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index e14a2328170..27dad296738 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -131,6 +131,7 @@ cond_syscall(sys_io_destroy); cond_syscall(sys_io_submit); cond_syscall(sys_io_cancel); cond_syscall(sys_io_getevents); +cond_syscall(sys_syslog); /* arch-specific weak syscall entries */ cond_syscall(sys_pciconfig_read); -- cgit From 1a94bc34768e463a93cb3751819709ab0ea80a01 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:13:59 +0100 Subject: [CVE-2009-0029] System call wrapper infrastructure From: Martin Schwidefsky By selecting HAVE_SYSCALL_WRAPPERS architectures can activate system call wrappers in order to sign extend system call arguments. All architectures where the ABI defines that the caller of a function has to perform sign extension probably need this. Reported-by: Christian Borntraeger Acked-by: Ralf Baechle Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- arch/Kconfig | 3 +++ include/linux/syscalls.h | 62 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 2e13aa26192..550dab22daa 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -62,6 +62,9 @@ config HAVE_EFFICIENT_UNALIGNED_ACCESS See Documentation/unaligned-memory-access.txt for more information on the topic of unaligned memory accesses. +config HAVE_SYSCALL_WRAPPERS + bool + config KRETPROBES def_bool y depends on KPROBES && HAVE_KRETPROBES diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index ca079c3d09e..0bb537d7ba2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -66,6 +66,68 @@ struct old_linux_dirent; #include #include +#define __SC_DECL1(t1, a1) t1 a1 +#define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__) +#define __SC_DECL3(t3, a3, ...) t3 a3, __SC_DECL2(__VA_ARGS__) +#define __SC_DECL4(t4, a4, ...) t4 a4, __SC_DECL3(__VA_ARGS__) +#define __SC_DECL5(t5, a5, ...) t5 a5, __SC_DECL4(__VA_ARGS__) +#define __SC_DECL6(t6, a6, ...) t6 a6, __SC_DECL5(__VA_ARGS__) + +#define __SC_LONG1(t1, a1) long a1 +#define __SC_LONG2(t2, a2, ...) long a2, __SC_LONG1(__VA_ARGS__) +#define __SC_LONG3(t3, a3, ...) long a3, __SC_LONG2(__VA_ARGS__) +#define __SC_LONG4(t4, a4, ...) long a4, __SC_LONG3(__VA_ARGS__) +#define __SC_LONG5(t5, a5, ...) long a5, __SC_LONG4(__VA_ARGS__) +#define __SC_LONG6(t6, a6, ...) long a6, __SC_LONG5(__VA_ARGS__) + +#define __SC_CAST1(t1, a1) (t1) a1 +#define __SC_CAST2(t2, a2, ...) (t2) a2, __SC_CAST1(__VA_ARGS__) +#define __SC_CAST3(t3, a3, ...) (t3) a3, __SC_CAST2(__VA_ARGS__) +#define __SC_CAST4(t4, a4, ...) (t4) a4, __SC_CAST3(__VA_ARGS__) +#define __SC_CAST5(t5, a5, ...) (t5) a5, __SC_CAST4(__VA_ARGS__) +#define __SC_CAST6(t6, a6, ...) (t6) a6, __SC_CAST5(__VA_ARGS__) + +#define __SC_TEST(type) BUILD_BUG_ON(sizeof(type) > sizeof(long)) +#define __SC_TEST1(t1, a1) __SC_TEST(t1) +#define __SC_TEST2(t2, a2, ...) __SC_TEST(t2); __SC_TEST1(__VA_ARGS__) +#define __SC_TEST3(t3, a3, ...) __SC_TEST(t3); __SC_TEST2(__VA_ARGS__) +#define __SC_TEST4(t4, a4, ...) __SC_TEST(t4); __SC_TEST3(__VA_ARGS__) +#define __SC_TEST5(t5, a5, ...) __SC_TEST(t5); __SC_TEST4(__VA_ARGS__) +#define __SC_TEST6(t6, a6, ...) __SC_TEST(t6); __SC_TEST5(__VA_ARGS__) + +#define SYSCALL_DEFINE0(name) asmlinkage long sys_##name(void) +#define SYSCALL_DEFINE1(...) SYSCALL_DEFINEx(1, __VA_ARGS__) +#define SYSCALL_DEFINE2(...) SYSCALL_DEFINEx(2, __VA_ARGS__) +#define SYSCALL_DEFINE3(...) SYSCALL_DEFINEx(3, __VA_ARGS__) +#define SYSCALL_DEFINE4(...) SYSCALL_DEFINEx(4, __VA_ARGS__) +#define SYSCALL_DEFINE5(...) SYSCALL_DEFINEx(5, __VA_ARGS__) +#define SYSCALL_DEFINE6(...) SYSCALL_DEFINEx(6, __VA_ARGS__) + +#define SYSCALL_ALIAS(alias, name) \ + asm ("\t.globl " #alias "\n\t.set " #alias ", " #name) + +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS + +#define SYSCALL_DEFINE(name) static inline long SYSC_##name +#define SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long sys_##name(__SC_DECL##x(__VA_ARGS__)); \ + static inline long SYSC_##name(__SC_DECL##x(__VA_ARGS__)); \ + asmlinkage long SyS_##name(__SC_LONG##x(__VA_ARGS__)) \ + { \ + __SC_TEST##x(__VA_ARGS__); \ + return (long) SYSC_##name(__SC_CAST##x(__VA_ARGS__)); \ + } \ + SYSCALL_ALIAS(sys_##name, SyS_##name); \ + static inline long SYSC_##name(__SC_DECL##x(__VA_ARGS__)) + +#else /* CONFIG_HAVE_SYSCALL_WRAPPERS */ + +#define SYSCALL_DEFINE(name) asmlinkage long sys_##name +#define SYSCALL_DEFINEx(x, name, ...) \ + asmlinkage long sys_##name(__SC_DECL##x(__VA_ARGS__)) + +#endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */ + asmlinkage long sys_time(time_t __user *tloc); asmlinkage long sys_stime(time_t __user *tptr); asmlinkage long sys_gettimeofday(struct timeval __user *tv, -- cgit From ee6a093222549ac0c72cfd296c69fa5e7d6daa34 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 14 Jan 2009 14:14:00 +0100 Subject: [CVE-2009-0029] powerpc: Enable syscall wrappers for 64-bit This enables the use of syscall wrappers to do proper sign extension for 64-bit programs. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Heiko Carstens --- arch/powerpc/Kconfig | 1 + include/linux/syscalls.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 84b861316ce..e39b73bc0ff 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -123,6 +123,7 @@ config PPC select HAVE_DMA_ATTRS if PPC64 select USE_GENERIC_SMP_HELPERS if SMP select HAVE_OPROFILE + select HAVE_SYSCALL_WRAPPERS if PPC64 config EARLY_PRINTK bool diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0bb537d7ba2..90aa5eba87a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -103,8 +103,14 @@ struct old_linux_dirent; #define SYSCALL_DEFINE5(...) SYSCALL_DEFINEx(5, __VA_ARGS__) #define SYSCALL_DEFINE6(...) SYSCALL_DEFINEx(6, __VA_ARGS__) +#ifdef CONFIG_PPC64 +#define SYSCALL_ALIAS(alias, name) \ + asm ("\t.globl " #alias "\n\t.set " #alias ", " #name "\n" \ + "\t.globl ." #alias "\n\t.set ." #alias ", ." #name) +#else #define SYSCALL_ALIAS(alias, name) \ asm ("\t.globl " #alias "\n\t.set " #alias ", " #name) +#endif #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS -- cgit From ed6bb6194350dc6ae97a65dbf2d621a3dbe6bbe9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:01 +0100 Subject: [CVE-2009-0029] s390: enable system call wrappers Signed-off-by: Heiko Carstens --- arch/s390/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a94a3c3ae93..6b0a3538dc6 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -77,6 +77,7 @@ mainmenu "Linux Kernel Configuration" config S390 def_bool y select USE_GENERIC_SMP_HELPERS if SMP + select HAVE_SYSCALL_WRAPPERS select HAVE_FUNCTION_TRACER select HAVE_OPROFILE select HAVE_KPROBES -- cgit From 6673e0c3fbeaed2cd08e2fd4a4aa97382d6fedb0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:02 +0100 Subject: [CVE-2009-0029] System call wrapper special cases System calls with an unsigned long long argument can't be converted with the standard wrappers since that would include a cast to long, which in turn means that we would lose the upper 32 bit on 32 bit architectures. Also semctl can't use the standard wrapper since it has a 'union' parameter. So we handle them as special case and add some extra wrappers instead. Signed-off-by: Heiko Carstens --- fs/dcookies.c | 10 ++++++++-- fs/open.c | 27 ++++++++++++++++++++++++--- fs/read_write.c | 24 ++++++++++++++++++++---- fs/sync.c | 26 ++++++++++++++++++++++---- ipc/sem.c | 9 ++++++++- mm/fadvise.c | 18 ++++++++++++++++-- mm/filemap.c | 9 ++++++++- 7 files changed, 106 insertions(+), 17 deletions(-) diff --git a/fs/dcookies.c b/fs/dcookies.c index 180e9fec4ad..a21cabdbd87 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -145,7 +145,7 @@ out: /* And here is where the userspace process can look up the cookie value * to retrieve the path. */ -asmlinkage long sys_lookup_dcookie(u64 cookie64, char __user * buf, size_t len) +SYSCALL_DEFINE(lookup_dcookie)(u64 cookie64, char __user * buf, size_t len) { unsigned long cookie = (unsigned long)cookie64; int err = -EINVAL; @@ -198,7 +198,13 @@ out: mutex_unlock(&dcookie_mutex); return err; } - +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_lookup_dcookie(u64 cookie64, long buf, long len) +{ + return SYSC_lookup_dcookie(cookie64, (char __user *) buf, (size_t) len); +} +SYSCALL_ALIAS(sys_lookup_dcookie, SyS_lookup_dcookie); +#endif static int dcookie_init(void) { diff --git a/fs/open.c b/fs/open.c index d882fd2351d..e349013fc79 100644 --- a/fs/open.c +++ b/fs/open.c @@ -351,21 +351,35 @@ asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length) /* LFS versions of truncate are only needed on 32 bit machines */ #if BITS_PER_LONG == 32 -asmlinkage long sys_truncate64(const char __user * path, loff_t length) +SYSCALL_DEFINE(truncate64)(const char __user * path, loff_t length) { return do_sys_truncate(path, length); } +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_truncate64(long path, loff_t length) +{ + return SYSC_truncate64((const char __user *) path, length); +} +SYSCALL_ALIAS(sys_truncate64, SyS_truncate64); +#endif -asmlinkage long sys_ftruncate64(unsigned int fd, loff_t length) +SYSCALL_DEFINE(ftruncate64)(unsigned int fd, loff_t length) { long ret = do_sys_ftruncate(fd, length, 0); /* avoid REGPARM breakage on x86: */ asmlinkage_protect(2, ret, fd, length); return ret; } +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_ftruncate64(long fd, loff_t length) +{ + return SYSC_ftruncate64((unsigned int) fd, length); +} +SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64); #endif +#endif /* BITS_PER_LONG == 32 */ -asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len) +SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len) { struct file *file; struct inode *inode; @@ -422,6 +436,13 @@ out_fput: out: return ret; } +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len) +{ + return SYSC_fallocate((int)fd, (int)mode, offset, len); +} +SYSCALL_ALIAS(sys_fallocate, SyS_fallocate); +#endif /* * access() needs to use the real uid/gid, not the effective uid/gid. diff --git a/fs/read_write.c b/fs/read_write.c index 940367f51f2..7a8326bc590 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -403,8 +403,8 @@ asmlinkage long sys_write(unsigned int fd, const char __user * buf, size_t count return ret; } -asmlinkage long sys_pread64(unsigned int fd, char __user *buf, - size_t count, loff_t pos) +SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, + size_t count, loff_t pos) { struct file *file; ssize_t ret = -EBADF; @@ -423,9 +423,17 @@ asmlinkage long sys_pread64(unsigned int fd, char __user *buf, return ret; } +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos) +{ + return SYSC_pread64((unsigned int) fd, (char __user *) buf, + (size_t) count, pos); +} +SYSCALL_ALIAS(sys_pread64, SyS_pread64); +#endif -asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, - size_t count, loff_t pos) +SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, + size_t count, loff_t pos) { struct file *file; ssize_t ret = -EBADF; @@ -444,6 +452,14 @@ asmlinkage long sys_pwrite64(unsigned int fd, const char __user *buf, return ret; } +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos) +{ + return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf, + (size_t) count, pos); +} +SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64); +#endif /* * Reduce an iovec's length in-place. Return the resulting number of segments diff --git a/fs/sync.c b/fs/sync.c index ac02b56548b..23ebbd72ecc 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -201,8 +201,8 @@ asmlinkage long sys_fdatasync(unsigned int fd) * already-instantiated disk blocks, there are no guarantees here that the data * will be available after a crash. */ -asmlinkage long sys_sync_file_range(int fd, loff_t offset, loff_t nbytes, - unsigned int flags) +SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes, + unsigned int flags) { int ret; struct file *file; @@ -262,14 +262,32 @@ out_put: out: return ret; } +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_sync_file_range(long fd, loff_t offset, loff_t nbytes, + long flags) +{ + return SYSC_sync_file_range((int) fd, offset, nbytes, + (unsigned int) flags); +} +SYSCALL_ALIAS(sys_sync_file_range, SyS_sync_file_range); +#endif /* It would be nice if people remember that not all the world's an i386 when they introduce new system calls */ -asmlinkage long sys_sync_file_range2(int fd, unsigned int flags, - loff_t offset, loff_t nbytes) +SYSCALL_DEFINE(sync_file_range2)(int fd, unsigned int flags, + loff_t offset, loff_t nbytes) { return sys_sync_file_range(fd, offset, nbytes, flags); } +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_sync_file_range2(long fd, long flags, + loff_t offset, loff_t nbytes) +{ + return SYSC_sync_file_range2((int) fd, (unsigned int) flags, + offset, nbytes); +} +SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2); +#endif /* * `endbyte' is inclusive diff --git a/ipc/sem.c b/ipc/sem.c index c68cd3f8f0c..c385c40c061 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -887,7 +887,7 @@ out_up: return err; } -asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg) +SYSCALL_DEFINE(semctl)(int semid, int semnum, int cmd, union semun arg) { int err = -EINVAL; int version; @@ -923,6 +923,13 @@ asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg) return -EINVAL; } } +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_semctl(int semid, int semnum, int cmd, union semun arg) +{ + return SYSC_semctl((int) semid, (int) semnum, (int) cmd, arg); +} +SYSCALL_ALIAS(sys_semctl, SyS_semctl); +#endif /* If the task doesn't already have a undo_list, then allocate one * here. We guarantee there is only one thread using this undo list, diff --git a/mm/fadvise.c b/mm/fadvise.c index a1da969bd98..54a0f8040af 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -24,7 +24,7 @@ * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could * deactivate the pages and clear PG_Referenced. */ -asmlinkage long sys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) +SYSCALL_DEFINE(fadvise64_64)(int fd, loff_t offset, loff_t len, int advice) { struct file *file = fget(fd); struct address_space *mapping; @@ -126,12 +126,26 @@ out: fput(file); return ret; } +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_fadvise64_64(long fd, loff_t offset, loff_t len, long advice) +{ + return SYSC_fadvise64_64((int) fd, offset, len, (int) advice); +} +SYSCALL_ALIAS(sys_fadvise64_64, SyS_fadvise64_64); +#endif #ifdef __ARCH_WANT_SYS_FADVISE64 -asmlinkage long sys_fadvise64(int fd, loff_t offset, size_t len, int advice) +SYSCALL_DEFINE(fadvise64)(int fd, loff_t offset, size_t len, int advice) { return sys_fadvise64_64(fd, offset, len, advice); } +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_fadvise64(long fd, loff_t offset, long len, long advice) +{ + return SYSC_fadvise64((int) fd, offset, (size_t)len, (int)advice); +} +SYSCALL_ALIAS(sys_fadvise64, SyS_fadvise64); +#endif #endif diff --git a/mm/filemap.c b/mm/filemap.c index 538b75ed623..23acefe5180 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1374,7 +1374,7 @@ do_readahead(struct address_space *mapping, struct file *filp, return 0; } -asmlinkage long sys_readahead(int fd, loff_t offset, size_t count) +SYSCALL_DEFINE(readahead)(int fd, loff_t offset, size_t count) { ssize_t ret; struct file *file; @@ -1393,6 +1393,13 @@ asmlinkage long sys_readahead(int fd, loff_t offset, size_t count) } return ret; } +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_readahead(long fd, loff_t offset, long count) +{ + return SYSC_readahead((int) fd, offset, (size_t) count); +} +SYSCALL_ALIAS(sys_readahead, SyS_readahead); +#endif #ifdef CONFIG_MMU /** -- cgit From 58fd3aa288939d3097fa04505b25c2f5e6e144d1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:03 +0100 Subject: [CVE-2009-0029] System call wrappers part 01 Signed-off-by: Heiko Carstens --- kernel/hrtimer.c | 4 ++-- kernel/sys.c | 2 +- kernel/time.c | 14 +++++++------- kernel/timer.c | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 1455b7651b6..2dc30c59c5f 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1467,8 +1467,8 @@ out: return ret; } -asmlinkage long -sys_nanosleep(struct timespec __user *rqtp, struct timespec __user *rmtp) +SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, + struct timespec __user *, rmtp) { struct timespec tu; diff --git a/kernel/sys.c b/kernel/sys.c index 763c3c17ded..37165e55233 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -919,7 +919,7 @@ void do_sys_times(struct tms *tms) tms->tms_cstime = cputime_to_clock_t(cstime); } -asmlinkage long sys_times(struct tms __user * tbuf) +SYSCALL_DEFINE1(times, struct tms __user *, tbuf) { if (tbuf) { struct tms tmp; diff --git a/kernel/time.c b/kernel/time.c index 4886e3ce83a..29511943871 100644 --- a/kernel/time.c +++ b/kernel/time.c @@ -60,7 +60,7 @@ EXPORT_SYMBOL(sys_tz); * why not move it into the appropriate arch directory (for those * architectures that need it). */ -asmlinkage long sys_time(time_t __user * tloc) +SYSCALL_DEFINE1(time, time_t __user *, tloc) { time_t i = get_seconds(); @@ -79,7 +79,7 @@ asmlinkage long sys_time(time_t __user * tloc) * architectures that need it). */ -asmlinkage long sys_stime(time_t __user *tptr) +SYSCALL_DEFINE1(stime, time_t __user *, tptr) { struct timespec tv; int err; @@ -99,8 +99,8 @@ asmlinkage long sys_stime(time_t __user *tptr) #endif /* __ARCH_WANT_SYS_TIME */ -asmlinkage long sys_gettimeofday(struct timeval __user *tv, - struct timezone __user *tz) +SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv, + struct timezone __user *, tz) { if (likely(tv != NULL)) { struct timeval ktv; @@ -184,8 +184,8 @@ int do_sys_settimeofday(struct timespec *tv, struct timezone *tz) return 0; } -asmlinkage long sys_settimeofday(struct timeval __user *tv, - struct timezone __user *tz) +SYSCALL_DEFINE2(settimeofday, struct timeval __user *, tv, + struct timezone __user *, tz) { struct timeval user_tv; struct timespec new_ts; @@ -205,7 +205,7 @@ asmlinkage long sys_settimeofday(struct timeval __user *tv, return do_sys_settimeofday(tv ? &new_ts : NULL, tz ? &new_tz : NULL); } -asmlinkage long sys_adjtimex(struct timex __user *txc_p) +SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p) { struct timex txc; /* Local copy of parameter */ int ret; diff --git a/kernel/timer.c b/kernel/timer.c index 7b8697d7f04..76041df06c5 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1129,7 +1129,7 @@ void do_timer(unsigned long ticks) * For backwards compatibility? This can be done in libc so Alpha * and all newer ports shouldn't need it. */ -asmlinkage long sys_alarm(unsigned int seconds) +SYSCALL_DEFINE1(alarm, unsigned int, seconds) { return alarm_setitimer(seconds); } @@ -1152,7 +1152,7 @@ asmlinkage long sys_alarm(unsigned int seconds) * * This is SMP safe as current->tgid does not change. */ -asmlinkage long sys_getpid(void) +SYSCALL_DEFINE0(getpid) { return task_tgid_vnr(current); } @@ -1308,7 +1308,7 @@ signed long __sched schedule_timeout_uninterruptible(signed long timeout) EXPORT_SYMBOL(schedule_timeout_uninterruptible); /* Thread ID - the internal kernel "pid" */ -asmlinkage long sys_gettid(void) +SYSCALL_DEFINE0(gettid) { return task_pid_vnr(current); } -- cgit From dbf040d9d1cbf1ef6250bdb095c5c118950bcde8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:04 +0100 Subject: [CVE-2009-0029] System call wrappers part 02 Signed-off-by: Heiko Carstens --- kernel/sys.c | 10 +++++----- kernel/timer.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 37165e55233..4c33555f8d9 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -756,7 +756,7 @@ error: return retval; } -asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) +SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid) { const struct cred *cred = current_cred(); int retval; @@ -814,7 +814,7 @@ error: return retval; } -asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) +SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid) { const struct cred *cred = current_cred(); int retval; @@ -1015,7 +1015,7 @@ out: return err; } -asmlinkage long sys_getpgid(pid_t pid) +SYSCALL_DEFINE1(getpgid, pid_t, pid) { struct task_struct *p; struct pid *grp; @@ -1045,14 +1045,14 @@ out: #ifdef __ARCH_WANT_SYS_GETPGRP -asmlinkage long sys_getpgrp(void) +SYSCALL_DEFINE0(getpgrp) { return sys_getpgid(0); } #endif -asmlinkage long sys_getsid(pid_t pid) +SYSCALL_DEFINE1(getsid, pid_t, pid) { struct task_struct *p; struct pid *sid; diff --git a/kernel/timer.c b/kernel/timer.c index 76041df06c5..14a51530a4c 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1163,7 +1163,7 @@ SYSCALL_DEFINE0(getpid) * value of ->real_parent under rcu_read_lock(), see * release_task()->call_rcu(delayed_put_task_struct). */ -asmlinkage long sys_getppid(void) +SYSCALL_DEFINE0(getppid) { int pid; @@ -1174,25 +1174,25 @@ asmlinkage long sys_getppid(void) return pid; } -asmlinkage long sys_getuid(void) +SYSCALL_DEFINE0(getuid) { /* Only we change this so SMP safe */ return current_uid(); } -asmlinkage long sys_geteuid(void) +SYSCALL_DEFINE0(geteuid) { /* Only we change this so SMP safe */ return current_euid(); } -asmlinkage long sys_getgid(void) +SYSCALL_DEFINE0(getgid) { /* Only we change this so SMP safe */ return current_gid(); } -asmlinkage long sys_getegid(void) +SYSCALL_DEFINE0(getegid) { /* Only we change this so SMP safe */ return current_egid(); -- cgit From ae1251ab785f6da87219df8352ffdac68bba23e4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:05 +0100 Subject: [CVE-2009-0029] System call wrappers part 03 Signed-off-by: Heiko Carstens --- kernel/sys.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernel/sys.c b/kernel/sys.c index 4c33555f8d9..ace9ced598b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -478,7 +478,7 @@ void ctrl_alt_del(void) * SMP: There are not races, the GIDs are checked only by filesystem * operations (as far as semantic preservation is concerned). */ -asmlinkage long sys_setregid(gid_t rgid, gid_t egid) +SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) { const struct cred *old; struct cred *new; @@ -529,7 +529,7 @@ error: * * SMP: Same implicit races as above. */ -asmlinkage long sys_setgid(gid_t gid) +SYSCALL_DEFINE1(setgid, gid_t, gid) { const struct cred *old; struct cred *new; @@ -597,7 +597,7 @@ static int set_user(struct cred *new) * 100% compatible with BSD. A program which uses just setuid() will be * 100% compatible with POSIX with saved IDs. */ -asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) +SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) { const struct cred *old; struct cred *new; @@ -661,7 +661,7 @@ error: * will allow a root program to temporarily drop privileges and be able to * regain them by swapping the real and effective uid. */ -asmlinkage long sys_setuid(uid_t uid) +SYSCALL_DEFINE1(setuid, uid_t, uid) { const struct cred *old; struct cred *new; @@ -705,7 +705,7 @@ error: * This function implements a generic ability to update ruid, euid, * and suid. This allows you to implement the 4.4 compatible seteuid(). */ -asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) +SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) { const struct cred *old; struct cred *new; @@ -771,7 +771,7 @@ SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __u /* * Same as above, but for rgid, egid, sgid. */ -asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) +SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) { const struct cred *old; struct cred *new; @@ -833,7 +833,7 @@ SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __u * whatever uid it wants to). It normally shadows "euid", except when * explicitly set by setfsuid() or for access.. */ -asmlinkage long sys_setfsuid(uid_t uid) +SYSCALL_DEFINE1(setfsuid, uid_t, uid) { const struct cred *old; struct cred *new; @@ -870,7 +870,7 @@ change_okay: /* * Samma pÃ¥ svenska.. */ -asmlinkage long sys_setfsgid(gid_t gid) +SYSCALL_DEFINE1(setfsgid, gid_t, gid) { const struct cred *old; struct cred *new; @@ -1311,7 +1311,7 @@ int set_current_groups(struct group_info *group_info) EXPORT_SYMBOL(set_current_groups); -asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) +SYSCALL_DEFINE2(getgroups, int, gidsetsize, gid_t __user *, grouplist) { const struct cred *cred = current_cred(); int i; -- cgit From b290ebe2c46d01b742b948ce03f09e8a3efb9a92 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:06 +0100 Subject: [CVE-2009-0029] System call wrappers part 04 Signed-off-by: Heiko Carstens --- kernel/acct.c | 2 +- kernel/capability.c | 4 ++-- kernel/exec_domain.c | 3 +-- kernel/itimer.c | 2 +- kernel/signal.c | 7 +++---- kernel/sys.c | 6 +++--- 6 files changed, 11 insertions(+), 13 deletions(-) diff --git a/kernel/acct.c b/kernel/acct.c index d57b7cbb98b..7afa3156416 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -277,7 +277,7 @@ static int acct_on(char *name) * should be written. If the filename is NULL, accounting will be * shutdown. */ -asmlinkage long sys_acct(const char __user *name) +SYSCALL_DEFINE1(acct, const char __user *, name) { int error; diff --git a/kernel/capability.c b/kernel/capability.c index 688926e496b..4e17041963f 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -161,7 +161,7 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, * * Returns 0 on success and < 0 on error. */ -asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) +SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) { int ret = 0; pid_t pid; @@ -235,7 +235,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) * * Returns 0 on success and < 0 on error. */ -asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) +SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data) { struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; unsigned i, tocopy; diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index 0511716e942..667c841c295 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -209,8 +209,7 @@ static int __init proc_execdomains_init(void) module_init(proc_execdomains_init); #endif -asmlinkage long -sys_personality(u_long personality) +SYSCALL_DEFINE1(personality, u_long, personality) { u_long old = current->personality; diff --git a/kernel/itimer.c b/kernel/itimer.c index db7c358b9a0..7e0663ea94f 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -100,7 +100,7 @@ int do_getitimer(int which, struct itimerval *value) return 0; } -asmlinkage long sys_getitimer(int which, struct itimerval __user *value) +SYSCALL_DEFINE2(getitimer, int, which, struct itimerval __user *, value) { int error = -EFAULT; struct itimerval get_buffer; diff --git a/kernel/signal.c b/kernel/signal.c index 856a5479d49..3fe08eaa5de 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2434,8 +2434,7 @@ out: #ifdef __ARCH_WANT_SYS_SIGPENDING -asmlinkage long -sys_sigpending(old_sigset_t __user *set) +SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set) { return do_sigpending(set, sizeof(*set)); } @@ -2446,8 +2445,8 @@ sys_sigpending(old_sigset_t __user *set) /* Some platforms have their own version with special arguments others support only sys_rt_sigprocmask. */ -asmlinkage long -sys_sigprocmask(int how, old_sigset_t __user *set, old_sigset_t __user *oset) +SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set, + old_sigset_t __user *, oset) { int error; old_sigset_t old_set, new_set; diff --git a/kernel/sys.c b/kernel/sys.c index ace9ced598b..cbe4502c28a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -944,7 +944,7 @@ SYSCALL_DEFINE1(times, struct tms __user *, tbuf) * Auch. Had to add the 'did_exec' flag to conform completely to POSIX. * LBT 04.03.94 */ -asmlinkage long sys_setpgid(pid_t pid, pid_t pgid) +SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) { struct task_struct *p; struct task_struct *group_leader = current->group_leader; @@ -1080,7 +1080,7 @@ out: return retval; } -asmlinkage long sys_setsid(void) +SYSCALL_DEFINE0(setsid) { struct task_struct *group_leader = current->group_leader; struct pid *sid = task_pid(group_leader); @@ -1340,7 +1340,7 @@ out: * without another task interfering. */ -asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) +SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist) { struct group_info *group_info; int retval; -- cgit From 362e9c07c7220c0a78c88826fc0d2bf7e4a4bb68 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:07 +0100 Subject: [CVE-2009-0029] System call wrappers part 05 Signed-off-by: Heiko Carstens --- kernel/itimer.c | 5 ++--- kernel/posix-timers.c | 43 +++++++++++++++++++------------------------ 2 files changed, 21 insertions(+), 27 deletions(-) diff --git a/kernel/itimer.c b/kernel/itimer.c index 7e0663ea94f..6a5fe93dd8b 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -260,9 +260,8 @@ unsigned int alarm_setitimer(unsigned int seconds) return it_old.it_value.tv_sec; } -asmlinkage long sys_setitimer(int which, - struct itimerval __user *value, - struct itimerval __user *ovalue) +SYSCALL_DEFINE3(setitimer, int, which, struct itimerval __user *, value, + struct itimerval __user *, ovalue) { struct itimerval set_buffer, get_buffer; int error; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 887c63787de..052ec4d195c 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -477,10 +477,9 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) /* Create a POSIX.1b interval timer. */ -asmlinkage long -sys_timer_create(const clockid_t which_clock, - struct sigevent __user *timer_event_spec, - timer_t __user * created_timer_id) +SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, + struct sigevent __user *, timer_event_spec, + timer_t __user *, created_timer_id) { struct k_itimer *new_timer; int error, new_timer_id; @@ -661,8 +660,8 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting) } /* Get the time remaining on a POSIX.1b interval timer. */ -asmlinkage long -sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) +SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id, + struct itimerspec __user *, setting) { struct k_itimer *timr; struct itimerspec cur_setting; @@ -691,8 +690,7 @@ sys_timer_gettime(timer_t timer_id, struct itimerspec __user *setting) * the call back to do_schedule_next_timer(). So all we need to do is * to pick up the frozen overrun. */ -asmlinkage long -sys_timer_getoverrun(timer_t timer_id) +SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id) { struct k_itimer *timr; int overrun; @@ -760,10 +758,9 @@ common_timer_set(struct k_itimer *timr, int flags, } /* Set a POSIX.1b interval timer */ -asmlinkage long -sys_timer_settime(timer_t timer_id, int flags, - const struct itimerspec __user *new_setting, - struct itimerspec __user *old_setting) +SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags, + const struct itimerspec __user *, new_setting, + struct itimerspec __user *, old_setting) { struct k_itimer *timr; struct itimerspec new_spec, old_spec; @@ -816,8 +813,7 @@ static inline int timer_delete_hook(struct k_itimer *timer) } /* Delete a POSIX.1b interval timer. */ -asmlinkage long -sys_timer_delete(timer_t timer_id) +SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) { struct k_itimer *timer; unsigned long flags; @@ -903,8 +899,8 @@ int do_posix_clock_nonanosleep(const clockid_t clock, int flags, } EXPORT_SYMBOL_GPL(do_posix_clock_nonanosleep); -asmlinkage long sys_clock_settime(const clockid_t which_clock, - const struct timespec __user *tp) +SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock, + const struct timespec __user *, tp) { struct timespec new_tp; @@ -916,8 +912,8 @@ asmlinkage long sys_clock_settime(const clockid_t which_clock, return CLOCK_DISPATCH(which_clock, clock_set, (which_clock, &new_tp)); } -asmlinkage long -sys_clock_gettime(const clockid_t which_clock, struct timespec __user *tp) +SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock, + struct timespec __user *,tp) { struct timespec kernel_tp; int error; @@ -933,8 +929,8 @@ sys_clock_gettime(const clockid_t which_clock, struct timespec __user *tp) } -asmlinkage long -sys_clock_getres(const clockid_t which_clock, struct timespec __user *tp) +SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock, + struct timespec __user *, tp) { struct timespec rtn_tp; int error; @@ -963,10 +959,9 @@ static int common_nsleep(const clockid_t which_clock, int flags, which_clock); } -asmlinkage long -sys_clock_nanosleep(const clockid_t which_clock, int flags, - const struct timespec __user *rqtp, - struct timespec __user *rmtp) +SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags, + const struct timespec __user *, rqtp, + struct timespec __user *, rmtp) { struct timespec t; -- cgit From 5add95d4f7cf08f6f62510f19576992912387501 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:08 +0100 Subject: [CVE-2009-0029] System call wrappers part 06 Signed-off-by: Heiko Carstens --- kernel/sched.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index 8be2c13b50d..1a0fdfa5ddf 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5126,7 +5126,7 @@ int can_nice(const struct task_struct *p, const int nice) * sys_setpriority is a more generic, but much slower function that * does similar things. */ -asmlinkage long sys_nice(int increment) +SYSCALL_DEFINE1(nice, int, increment) { long nice, retval; @@ -5433,8 +5433,8 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) * @policy: new policy. * @param: structure containing the new RT priority. */ -asmlinkage long -sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) +SYSCALL_DEFINE3(sched_setscheduler, pid_t, pid, int, policy, + struct sched_param __user *, param) { /* negative values for policy are not valid */ if (policy < 0) @@ -5448,7 +5448,7 @@ sys_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param) * @pid: the pid in question. * @param: structure containing the new RT priority. */ -asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) +SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param) { return do_sched_setscheduler(pid, -1, param); } @@ -5457,7 +5457,7 @@ asmlinkage long sys_sched_setparam(pid_t pid, struct sched_param __user *param) * sys_sched_getscheduler - get the policy (scheduling class) of a thread * @pid: the pid in question. */ -asmlinkage long sys_sched_getscheduler(pid_t pid) +SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid) { struct task_struct *p; int retval; @@ -5482,7 +5482,7 @@ asmlinkage long sys_sched_getscheduler(pid_t pid) * @pid: the pid in question. * @param: structure containing the RT priority. */ -asmlinkage long sys_sched_getparam(pid_t pid, struct sched_param __user *param) +SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param) { struct sched_param lp; struct task_struct *p; @@ -5600,8 +5600,8 @@ static int get_user_cpu_mask(unsigned long __user *user_mask_ptr, unsigned len, * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to the new cpu mask */ -asmlinkage long sys_sched_setaffinity(pid_t pid, unsigned int len, - unsigned long __user *user_mask_ptr) +SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len, + unsigned long __user *, user_mask_ptr) { cpumask_var_t new_mask; int retval; @@ -5648,8 +5648,8 @@ out_unlock: * @len: length in bytes of the bitmask pointed to by user_mask_ptr * @user_mask_ptr: user-space pointer to hold the current cpu mask */ -asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, - unsigned long __user *user_mask_ptr) +SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len, + unsigned long __user *, user_mask_ptr) { int ret; cpumask_var_t mask; @@ -5678,7 +5678,7 @@ asmlinkage long sys_sched_getaffinity(pid_t pid, unsigned int len, * This function yields the current CPU to other tasks. If there are no * other threads running on this CPU then this function will return. */ -asmlinkage long sys_sched_yield(void) +SYSCALL_DEFINE0(sched_yield) { struct rq *rq = this_rq_lock(); @@ -5819,7 +5819,7 @@ long __sched io_schedule_timeout(long timeout) * this syscall returns the maximum rt_priority that can be used * by a given scheduling class. */ -asmlinkage long sys_sched_get_priority_max(int policy) +SYSCALL_DEFINE1(sched_get_priority_max, int, policy) { int ret = -EINVAL; @@ -5844,7 +5844,7 @@ asmlinkage long sys_sched_get_priority_max(int policy) * this syscall returns the minimum rt_priority that can be used * by a given scheduling class. */ -asmlinkage long sys_sched_get_priority_min(int policy) +SYSCALL_DEFINE1(sched_get_priority_min, int, policy) { int ret = -EINVAL; -- cgit From 754fe8d297bfae7b77f7ce866e2fb0c5fb186506 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:09 +0100 Subject: [CVE-2009-0029] System call wrappers part 07 Signed-off-by: Heiko Carstens --- kernel/exit.c | 8 ++++---- kernel/kexec.c | 5 ++--- kernel/sched.c | 4 ++-- kernel/signal.c | 2 +- kernel/sys.c | 7 ++++--- net/socket.c | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index fac9b040af2..08895df0eab 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1141,7 +1141,7 @@ NORET_TYPE void complete_and_exit(struct completion *comp, long code) EXPORT_SYMBOL(complete_and_exit); -asmlinkage long sys_exit(int error_code) +SYSCALL_DEFINE1(exit, int, error_code) { do_exit((error_code&0xff)<<8); } @@ -1182,7 +1182,7 @@ do_group_exit(int exit_code) * wait4()-ing process will get the correct exit code - even if this * thread is not the thread group leader. */ -asmlinkage long sys_exit_group(int error_code) +SYSCALL_DEFINE1(exit_group, int, error_code) { do_group_exit((error_code & 0xff) << 8); /* NOTREACHED */ @@ -1795,8 +1795,8 @@ asmlinkage long sys_waitid(int which, pid_t upid, return ret; } -asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr, - int options, struct rusage __user *ru) +SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, + int, options, struct rusage __user *, ru) { struct pid *pid = NULL; enum pid_type type; diff --git a/kernel/kexec.c b/kernel/kexec.c index 3fb855ad6aa..8a6d7b08864 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -934,9 +934,8 @@ struct kimage *kexec_crash_image; static DEFINE_MUTEX(kexec_mutex); -asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments, - struct kexec_segment __user *segments, - unsigned long flags) +SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, + struct kexec_segment __user *, segments, unsigned long, flags) { struct kimage **dest_image, *image; int result; diff --git a/kernel/sched.c b/kernel/sched.c index 1a0fdfa5ddf..65c02037b05 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5869,8 +5869,8 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) * this syscall writes the default timeslice value of a given process * into the user-space timespec buffer. A value of '0' means infinity. */ -asmlinkage -long sys_sched_rr_get_interval(pid_t pid, struct timespec __user *interval) +SYSCALL_DEFINE4(sched_rr_get_interval, pid_t, pid, + struct timespec __user *, interval) { struct task_struct *p; unsigned int time_slice; diff --git a/kernel/signal.c b/kernel/signal.c index 3fe08eaa5de..41f32e08615 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1961,7 +1961,7 @@ EXPORT_SYMBOL(unblock_all_signals); * System call entry points. */ -asmlinkage long sys_restart_syscall(void) +SYSCALL_DEFINE0(restart_syscall) { struct restart_block *restart = ¤t_thread_info()->restart_block; return restart->fn(restart); diff --git a/kernel/sys.c b/kernel/sys.c index cbe4502c28a..39b192b4003 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -143,7 +143,7 @@ out: return error; } -asmlinkage long sys_setpriority(int which, int who, int niceval) +SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) { struct task_struct *g, *p; struct user_struct *user; @@ -208,7 +208,7 @@ out: * has been offset by 20 (ie it returns 40..1 instead of -20..19) * to stay compatible. */ -asmlinkage long sys_getpriority(int which, int who) +SYSCALL_DEFINE2(getpriority, int, which, int, who) { struct task_struct *g, *p; struct user_struct *user; @@ -355,7 +355,8 @@ EXPORT_SYMBOL_GPL(kernel_power_off); * * reboot doesn't sync: do that yourself before calling this. */ -asmlinkage long sys_reboot(int magic1, int magic2, unsigned int cmd, void __user * arg) +SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd, + void __user *, arg) { char buffer[256]; diff --git a/net/socket.c b/net/socket.c index 06603d73c41..cc9b666e58f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1789,7 +1789,7 @@ out_put: * Shutdown a socket. */ -asmlinkage long sys_shutdown(int fd, int how) +SYSCALL_DEFINE2(shutdown, int, fd, int, how) { int err, fput_needed; struct socket *sock; -- cgit From 17da2bd90abf428523de0fb98f7075e00e3ed42e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:10 +0100 Subject: [CVE-2009-0029] System call wrappers part 08 Signed-off-by: Heiko Carstens --- kernel/exit.c | 7 +++---- kernel/fork.c | 2 +- kernel/futex.c | 6 +++--- kernel/module.c | 10 ++++------ kernel/sched.c | 2 +- kernel/signal.c | 18 +++++++----------- 6 files changed, 19 insertions(+), 26 deletions(-) diff --git a/kernel/exit.c b/kernel/exit.c index 08895df0eab..f80dec3f187 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1754,9 +1754,8 @@ end: return retval; } -asmlinkage long sys_waitid(int which, pid_t upid, - struct siginfo __user *infop, int options, - struct rusage __user *ru) +SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, + infop, int, options, struct rusage __user *, ru) { struct pid *pid = NULL; enum pid_type type; @@ -1833,7 +1832,7 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, * sys_waitpid() remains for compatibility. waitpid() should be * implemented by calling sys_wait4() from libc.a. */ -asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options) +SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options) { return sys_wait4(pid, stat_addr, options, NULL); } diff --git a/kernel/fork.c b/kernel/fork.c index 1d68f1255dd..8eb37d38c6a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -901,7 +901,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p) clear_freeze_flag(p); } -asmlinkage long sys_set_tid_address(int __user *tidptr) +SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) { current->clear_child_tid = tidptr; diff --git a/kernel/futex.c b/kernel/futex.c index 002aa189eb0..e86931d8d4e 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1978,9 +1978,9 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, } -asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val, - struct timespec __user *utime, u32 __user *uaddr2, - u32 val3) +SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, + struct timespec __user *, utime, u32 __user *, uaddr2, + u32, val3) { struct timespec ts; ktime_t t, *tp = NULL; diff --git a/kernel/module.c b/kernel/module.c index c9332c90d5a..e8b51d41dd7 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -743,8 +743,8 @@ static void wait_for_zero_refcount(struct module *mod) mutex_lock(&module_mutex); } -asmlinkage long -sys_delete_module(const char __user *name_user, unsigned int flags) +SYSCALL_DEFINE2(delete_module, const char __user *, name_user, + unsigned int, flags) { struct module *mod; char name[MODULE_NAME_LEN]; @@ -2296,10 +2296,8 @@ static noinline struct module *load_module(void __user *umod, } /* This is where the real work happens */ -asmlinkage long -sys_init_module(void __user *umod, - unsigned long len, - const char __user *uargs) +SYSCALL_DEFINE3(init_module, void __user *, umod, + unsigned long, len, const char __user *, uargs) { struct module *mod; int ret = 0; diff --git a/kernel/sched.c b/kernel/sched.c index 65c02037b05..eb1931eef58 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5869,7 +5869,7 @@ SYSCALL_DEFINE1(sched_get_priority_min, int, policy) * this syscall writes the default timeslice value of a given process * into the user-space timespec buffer. A value of '0' means infinity. */ -SYSCALL_DEFINE4(sched_rr_get_interval, pid_t, pid, +SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid, struct timespec __user *, interval) { struct task_struct *p; diff --git a/kernel/signal.c b/kernel/signal.c index 41f32e08615..278cc8737f1 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2014,8 +2014,8 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) return error; } -asmlinkage long -sys_rt_sigprocmask(int how, sigset_t __user *set, sigset_t __user *oset, size_t sigsetsize) +SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set, + sigset_t __user *, oset, size_t, sigsetsize) { int error = -EINVAL; sigset_t old_set, new_set; @@ -2074,8 +2074,7 @@ out: return error; } -asmlinkage long -sys_rt_sigpending(sigset_t __user *set, size_t sigsetsize) +SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize) { return do_sigpending(set, sigsetsize); } @@ -2146,11 +2145,9 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from) #endif -asmlinkage long -sys_rt_sigtimedwait(const sigset_t __user *uthese, - siginfo_t __user *uinfo, - const struct timespec __user *uts, - size_t sigsetsize) +SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, + siginfo_t __user *, uinfo, const struct timespec __user *, uts, + size_t, sigsetsize) { int ret, sig; sigset_t these; @@ -2223,8 +2220,7 @@ sys_rt_sigtimedwait(const sigset_t __user *uthese, return ret; } -asmlinkage long -sys_kill(pid_t pid, int sig) +SYSCALL_DEFINE2(kill, pid_t, pid, int, sig) { struct siginfo info; -- cgit From a5f8fa9e9ba5ef3305e147f41ad6e1e84ac1f0bd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:11 +0100 Subject: [CVE-2009-0029] System call wrappers part 09 Signed-off-by: Heiko Carstens --- fs/sync.c | 6 +++--- kernel/signal.c | 21 ++++++++------------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/fs/sync.c b/fs/sync.c index 23ebbd72ecc..a16d53e5fe9 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -36,7 +36,7 @@ static void do_sync(unsigned long wait) laptop_sync_completion(); } -asmlinkage long sys_sync(void) +SYSCALL_DEFINE0(sync) { do_sync(1); return 0; @@ -144,12 +144,12 @@ static int do_fsync(unsigned int fd, int datasync) return ret; } -asmlinkage long sys_fsync(unsigned int fd) +SYSCALL_DEFINE1(fsync, unsigned int, fd) { return do_fsync(fd, 0); } -asmlinkage long sys_fdatasync(unsigned int fd) +SYSCALL_DEFINE1(fdatasync, unsigned int, fd) { return do_fsync(fd, 1); } diff --git a/kernel/signal.c b/kernel/signal.c index 278cc8737f1..e2333929611 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2279,7 +2279,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) * exists but it's not belonging to the target process anymore. This * method solves the problem of threads exiting and PIDs getting reused. */ -asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig) +SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig) { /* This is only valid for single tasks */ if (pid <= 0 || tgid <= 0) @@ -2291,8 +2291,7 @@ asmlinkage long sys_tgkill(pid_t tgid, pid_t pid, int sig) /* * Send a signal to only one task, even if it's a CLONE_THREAD task. */ -asmlinkage long -sys_tkill(pid_t pid, int sig) +SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig) { /* This is only valid for single tasks */ if (pid <= 0) @@ -2301,8 +2300,8 @@ sys_tkill(pid_t pid, int sig) return do_tkill(0, pid, sig); } -asmlinkage long -sys_rt_sigqueueinfo(pid_t pid, int sig, siginfo_t __user *uinfo) +SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig, + siginfo_t __user *, uinfo) { siginfo_t info; @@ -2526,15 +2525,13 @@ out: /* * For backwards compatibility. Functionality superseded by sigprocmask. */ -asmlinkage long -sys_sgetmask(void) +SYSCALL_DEFINE0(sgetmask) { /* SMP safe */ return current->blocked.sig[0]; } -asmlinkage long -sys_ssetmask(int newmask) +SYSCALL_DEFINE1(ssetmask, int, newmask) { int old; @@ -2554,8 +2551,7 @@ sys_ssetmask(int newmask) /* * For backwards compatibility. Functionality superseded by sigaction. */ -asmlinkage long -sys_signal(int sig, __sighandler_t handler) +SYSCALL_DEFINE2(signal, int, sig, __sighandler_t, handler) { struct k_sigaction new_sa, old_sa; int ret; @@ -2572,8 +2568,7 @@ sys_signal(int sig, __sighandler_t handler) #ifdef __ARCH_WANT_SYS_PAUSE -asmlinkage long -sys_pause(void) +SYSCALL_DEFINE0(pause) { current->state = TASK_INTERRUPTIBLE; schedule(); -- cgit From bdc480e3bef6eb0e7071770834cbdda7e30a5436 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:12 +0100 Subject: [CVE-2009-0029] System call wrappers part 10 Signed-off-by: Heiko Carstens --- fs/buffer.c | 2 +- fs/namespace.c | 9 ++++----- fs/open.c | 12 +++++------- fs/stat.c | 2 +- 4 files changed, 11 insertions(+), 14 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index b6e8b8632e2..b58208f1640 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3243,7 +3243,7 @@ void block_sync_page(struct page *page) * Use of bdflush() is deprecated and will be removed in a future kernel. * The `pdflush' kernel threads fully replace bdflush daemons and this call. */ -asmlinkage long sys_bdflush(int func, long data) +SYSCALL_DEFINE2(bdflush, int, func, long, data) { static int msg_count; diff --git a/fs/namespace.c b/fs/namespace.c index a40685d800a..3876a0fbaa6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1128,7 +1128,7 @@ static int do_umount(struct vfsmount *mnt, int flags) * unixes. Our API is identical to OSF/1 to avoid making a mess of AMD */ -asmlinkage long sys_umount(char __user * name, int flags) +SYSCALL_DEFINE2(umount, char __user *, name, int, flags) { struct path path; int retval; @@ -1160,7 +1160,7 @@ out: /* * The 2.0 compatible umount. No flags. */ -asmlinkage long sys_oldumount(char __user * name) +SYSCALL_DEFINE1(oldumount, char __user *, name) { return sys_umount(name, 0); } @@ -2045,9 +2045,8 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, return new_ns; } -asmlinkage long sys_mount(char __user * dev_name, char __user * dir_name, - char __user * type, unsigned long flags, - void __user * data) +SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name, + char __user *, type, unsigned long, flags, void __user *, data) { int retval; unsigned long data_page; diff --git a/fs/open.c b/fs/open.c index e349013fc79..f6c2f5673ed 100644 --- a/fs/open.c +++ b/fs/open.c @@ -122,7 +122,7 @@ static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) return 0; } -asmlinkage long sys_statfs(const char __user *pathname, struct statfs __user * buf) +SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct statfs __user *, buf) { struct path path; int error; @@ -138,8 +138,7 @@ asmlinkage long sys_statfs(const char __user *pathname, struct statfs __user * b return error; } - -asmlinkage long sys_statfs64(const char __user *pathname, size_t sz, struct statfs64 __user *buf) +SYSCALL_DEFINE3(statfs64, const char __user *, pathname, size_t, sz, struct statfs64 __user *, buf) { struct path path; long error; @@ -157,8 +156,7 @@ asmlinkage long sys_statfs64(const char __user *pathname, size_t sz, struct stat return error; } - -asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user * buf) +SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct statfs __user *, buf) { struct file * file; struct statfs tmp; @@ -289,7 +287,7 @@ out: return error; } -asmlinkage long sys_truncate(const char __user * path, unsigned long length) +SYSCALL_DEFINE2(truncate, const char __user *, path, unsigned long, length) { /* on 32-bit boxen it will cut the range 2^31--2^32-1 off */ return do_sys_truncate(path, (long)length); @@ -341,7 +339,7 @@ out: return error; } -asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length) +SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length) { long ret = do_sys_ftruncate(fd, length, 1); /* avoid REGPARM breakage on x86: */ diff --git a/fs/stat.c b/fs/stat.c index 7e12a6f8279..a1411648048 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -152,7 +152,7 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } -asmlinkage long sys_stat(char __user * filename, struct __old_kernel_stat __user * statbuf) +SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; int error = vfs_stat_fd(AT_FDCWD, filename, &stat); -- cgit From 257ac264d69017270fbc3cf5536953525db4076c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:13 +0100 Subject: [CVE-2009-0029] System call wrappers part 11 Signed-off-by: Heiko Carstens --- fs/open.c | 2 +- fs/stat.c | 20 ++++++++++++-------- fs/super.c | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/open.c b/fs/open.c index f6c2f5673ed..322bb60d168 100644 --- a/fs/open.c +++ b/fs/open.c @@ -174,7 +174,7 @@ out: return error; } -asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, struct statfs64 __user *buf) +SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, size_t, sz, struct statfs64 __user *, buf) { struct file * file; struct statfs64 tmp; diff --git a/fs/stat.c b/fs/stat.c index a1411648048..f29c5fe4f8b 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -162,7 +162,8 @@ SYSCALL_DEFINE2(stat, char __user *, filename, struct __old_kernel_stat __user * return error; } -asmlinkage long sys_lstat(char __user * filename, struct __old_kernel_stat __user * statbuf) + +SYSCALL_DEFINE2(lstat, char __user *, filename, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); @@ -172,7 +173,8 @@ asmlinkage long sys_lstat(char __user * filename, struct __old_kernel_stat __use return error; } -asmlinkage long sys_fstat(unsigned int fd, struct __old_kernel_stat __user * statbuf) + +SYSCALL_DEFINE2(fstat, unsigned int, fd, struct __old_kernel_stat __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); @@ -235,7 +237,7 @@ static int cp_new_stat(struct kstat *stat, struct stat __user *statbuf) return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } -asmlinkage long sys_newstat(char __user *filename, struct stat __user *statbuf) +SYSCALL_DEFINE2(newstat, char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; int error = vfs_stat_fd(AT_FDCWD, filename, &stat); @@ -246,7 +248,7 @@ asmlinkage long sys_newstat(char __user *filename, struct stat __user *statbuf) return error; } -asmlinkage long sys_newlstat(char __user *filename, struct stat __user *statbuf) +SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf) { struct kstat stat; int error = vfs_lstat_fd(AT_FDCWD, filename, &stat); @@ -280,7 +282,7 @@ out: } #endif -asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf) +SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); @@ -365,7 +367,7 @@ static long cp_new_stat64(struct kstat *stat, struct stat64 __user *statbuf) return copy_to_user(statbuf,&tmp,sizeof(tmp)) ? -EFAULT : 0; } -asmlinkage long sys_stat64(char __user * filename, struct stat64 __user * statbuf) +SYSCALL_DEFINE2(stat64, char __user *, filename, struct stat64 __user *, statbuf) { struct kstat stat; int error = vfs_stat(filename, &stat); @@ -375,7 +377,8 @@ asmlinkage long sys_stat64(char __user * filename, struct stat64 __user * statbu return error; } -asmlinkage long sys_lstat64(char __user * filename, struct stat64 __user * statbuf) + +SYSCALL_DEFINE2(lstat64, char __user *, filename, struct stat64 __user *, statbuf) { struct kstat stat; int error = vfs_lstat(filename, &stat); @@ -385,7 +388,8 @@ asmlinkage long sys_lstat64(char __user * filename, struct stat64 __user * statb return error; } -asmlinkage long sys_fstat64(unsigned long fd, struct stat64 __user * statbuf) + +SYSCALL_DEFINE2(fstat64, unsigned long, fd, struct stat64 __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); diff --git a/fs/super.c b/fs/super.c index ed080c41716..645e5403f2a 100644 --- a/fs/super.c +++ b/fs/super.c @@ -544,7 +544,7 @@ rescan: return NULL; } -asmlinkage long sys_ustat(unsigned dev, struct ustat __user * ubuf) +SYSCALL_DEFINE2(ustat, unsigned, dev, struct ustat __user *, ubuf) { struct super_block *s; struct ustat tmp; -- cgit From 64fd1de3d821659ac0a3004fd5ee1de59e64af30 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:14 +0100 Subject: [CVE-2009-0029] System call wrappers part 12 Signed-off-by: Heiko Carstens --- fs/xattr.c | 46 +++++++++++++++++++++------------------------- 1 file changed, 21 insertions(+), 25 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index d049ae27aae..0367a5dae2b 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -251,9 +251,9 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value, return error; } -asmlinkage long -sys_setxattr(const char __user *pathname, const char __user *name, - const void __user *value, size_t size, int flags) +SYSCALL_DEFINE5(setxattr, const char __user *, pathname, + const char __user *, name, const void __user *, value, + size_t, size, int, flags) { struct path path; int error; @@ -270,9 +270,9 @@ sys_setxattr(const char __user *pathname, const char __user *name, return error; } -asmlinkage long -sys_lsetxattr(const char __user *pathname, const char __user *name, - const void __user *value, size_t size, int flags) +SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, + const char __user *, name, const void __user *, value, + size_t, size, int, flags) { struct path path; int error; @@ -289,9 +289,8 @@ sys_lsetxattr(const char __user *pathname, const char __user *name, return error; } -asmlinkage long -sys_fsetxattr(int fd, const char __user *name, const void __user *value, - size_t size, int flags) +SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, + const void __user *,value, size_t, size, int, flags) { struct file *f; struct dentry *dentry; @@ -349,9 +348,8 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, return error; } -asmlinkage long -sys_getxattr(const char __user *pathname, const char __user *name, - void __user *value, size_t size) +SYSCALL_DEFINE4(getxattr, const char __user *, pathname, + const char __user *, name, void __user *, value, size_t, size) { struct path path; ssize_t error; @@ -364,9 +362,8 @@ sys_getxattr(const char __user *pathname, const char __user *name, return error; } -asmlinkage long -sys_lgetxattr(const char __user *pathname, const char __user *name, void __user *value, - size_t size) +SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname, + const char __user *, name, void __user *, value, size_t, size) { struct path path; ssize_t error; @@ -379,8 +376,8 @@ sys_lgetxattr(const char __user *pathname, const char __user *name, void __user return error; } -asmlinkage long -sys_fgetxattr(int fd, const char __user *name, void __user *value, size_t size) +SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, + void __user *, value, size_t, size) { struct file *f; ssize_t error = -EBADF; @@ -424,8 +421,8 @@ listxattr(struct dentry *d, char __user *list, size_t size) return error; } -asmlinkage long -sys_listxattr(const char __user *pathname, char __user *list, size_t size) +SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list, + size_t, size) { struct path path; ssize_t error; @@ -438,8 +435,8 @@ sys_listxattr(const char __user *pathname, char __user *list, size_t size) return error; } -asmlinkage long -sys_llistxattr(const char __user *pathname, char __user *list, size_t size) +SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list, + size_t, size) { struct path path; ssize_t error; @@ -452,8 +449,7 @@ sys_llistxattr(const char __user *pathname, char __user *list, size_t size) return error; } -asmlinkage long -sys_flistxattr(int fd, char __user *list, size_t size) +SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) { struct file *f; ssize_t error = -EBADF; @@ -485,8 +481,8 @@ removexattr(struct dentry *d, const char __user *name) return vfs_removexattr(d, kname); } -asmlinkage long -sys_removexattr(const char __user *pathname, const char __user *name) +SYSCALL_DEFINE2(removexattr, const char __user *, pathname, + const char __user *, name) { struct path path; int error; -- cgit From 6a6160a7b5c27b3c38651baef92a14fa7072b3c1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:15 +0100 Subject: [CVE-2009-0029] System call wrappers part 13 Signed-off-by: Heiko Carstens --- fs/xattr.c | 7 +++---- mm/fremap.c | 4 ++-- mm/mlock.c | 4 ++-- mm/mmap.c | 4 ++-- mm/mprotect.c | 4 ++-- mm/mremap.c | 6 +++--- mm/msync.c | 2 +- mm/nommu.c | 11 +++++------ 8 files changed, 20 insertions(+), 22 deletions(-) diff --git a/fs/xattr.c b/fs/xattr.c index 0367a5dae2b..197c4fcac03 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -499,8 +499,8 @@ SYSCALL_DEFINE2(removexattr, const char __user *, pathname, return error; } -asmlinkage long -sys_lremovexattr(const char __user *pathname, const char __user *name) +SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, + const char __user *, name) { struct path path; int error; @@ -517,8 +517,7 @@ sys_lremovexattr(const char __user *pathname, const char __user *name) return error; } -asmlinkage long -sys_fremovexattr(int fd, const char __user *name) +SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) { struct file *f; struct dentry *dentry; diff --git a/mm/fremap.c b/mm/fremap.c index 62d5bbda921..736ba7f3306 100644 --- a/mm/fremap.c +++ b/mm/fremap.c @@ -120,8 +120,8 @@ static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma, * and the vma's default protection is used. Arbitrary protections * might be implemented in the future. */ -asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size, - unsigned long prot, unsigned long pgoff, unsigned long flags) +SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, + unsigned long, prot, unsigned long, pgoff, unsigned long, flags) { struct mm_struct *mm = current->mm; struct address_space *mapping; diff --git a/mm/mlock.c b/mm/mlock.c index e125156c664..04d5e7429c5 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -530,7 +530,7 @@ static int do_mlock(unsigned long start, size_t len, int on) return error; } -asmlinkage long sys_mlock(unsigned long start, size_t len) +SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) { unsigned long locked; unsigned long lock_limit; @@ -558,7 +558,7 @@ asmlinkage long sys_mlock(unsigned long start, size_t len) return error; } -asmlinkage long sys_munlock(unsigned long start, size_t len) +SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) { int ret; diff --git a/mm/mmap.c b/mm/mmap.c index a970d890cb2..8d95902e9a3 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -245,7 +245,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) return next; } -asmlinkage long sys_brk(unsigned long brk) +SYSCALL_DEFINE1(brk, unsigned long, brk) { unsigned long rlim, retval; unsigned long newbrk, oldbrk; @@ -1948,7 +1948,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) EXPORT_SYMBOL(do_munmap); -asmlinkage long sys_munmap(unsigned long addr, size_t len) +SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) { int ret; struct mm_struct *mm = current->mm; diff --git a/mm/mprotect.c b/mm/mprotect.c index d0f6e7ce09f..abe2694e13f 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -217,8 +217,8 @@ fail: return error; } -asmlinkage long -sys_mprotect(unsigned long start, size_t len, unsigned long prot) +SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, + unsigned long, prot) { unsigned long vm_flags, nstart, end, tmp, reqprot; struct vm_area_struct *vma, *prev; diff --git a/mm/mremap.c b/mm/mremap.c index 5572e0825d8..a39b7b91be4 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -420,9 +420,9 @@ out_nc: return ret; } -asmlinkage long sys_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr) +SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, + unsigned long, new_len, unsigned long, flags, + unsigned long, new_addr) { unsigned long ret; diff --git a/mm/msync.c b/mm/msync.c index 07dae08cf31..4083209b7f0 100644 --- a/mm/msync.c +++ b/mm/msync.c @@ -28,7 +28,7 @@ * So by _not_ starting I/O in MS_ASYNC we provide complete flexibility to * applications. */ -asmlinkage long sys_msync(unsigned long start, size_t len, int flags) +SYSCALL_DEFINE3(msync, unsigned long, start, size_t, len, int, flags) { unsigned long end; struct mm_struct *mm = current->mm; diff --git a/mm/nommu.c b/mm/nommu.c index ee3e7892773..8cee8c8ff0f 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -416,7 +416,7 @@ EXPORT_SYMBOL(vm_insert_page); * to a regular file. in this case, the unmapping will need * to invoke file system routines that need the global lock. */ -asmlinkage long sys_brk(unsigned long brk) +SYSCALL_DEFINE1(brk, unsigned long, brk) { struct mm_struct *mm = current->mm; @@ -1573,7 +1573,7 @@ erase_whole_vma: } EXPORT_SYMBOL(do_munmap); -asmlinkage long sys_munmap(unsigned long addr, size_t len) +SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) { int ret; struct mm_struct *mm = current->mm; @@ -1657,10 +1657,9 @@ unsigned long do_mremap(unsigned long addr, } EXPORT_SYMBOL(do_mremap); -asmlinkage -unsigned long sys_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr) +SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, + unsigned long, new_len, unsigned long, flags, + unsigned long, new_addr) { unsigned long ret; -- cgit From 3480b25743cb7404928d57efeaa3d085708b04c2 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:16 +0100 Subject: [CVE-2009-0029] System call wrappers part 14 Signed-off-by: Heiko Carstens --- fs/namei.c | 8 ++++---- fs/namespace.c | 4 ++-- fs/open.c | 2 +- mm/madvise.c | 2 +- mm/mincore.c | 4 ++-- mm/mlock.c | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index f05bed24242..43fa2525972 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2017,7 +2017,7 @@ out_unlock: return error; } -asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev) +SYSCALL_DEFINE3(mknod, const char __user *, filename, int, mode, unsigned, dev) { return sys_mknodat(AT_FDCWD, filename, mode, dev); } @@ -2302,7 +2302,7 @@ asmlinkage long sys_unlinkat(int dfd, const char __user *pathname, int flag) return do_unlinkat(dfd, pathname); } -asmlinkage long sys_unlink(const char __user *pathname) +SYSCALL_DEFINE1(unlink, const char __user *, pathname) { return do_unlinkat(AT_FDCWD, pathname); } @@ -2370,7 +2370,7 @@ out_putname: return error; } -asmlinkage long sys_symlink(const char __user *oldname, const char __user *newname) +SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname) { return sys_symlinkat(oldname, AT_FDCWD, newname); } @@ -2473,7 +2473,7 @@ out: return error; } -asmlinkage long sys_link(const char __user *oldname, const char __user *newname) +SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname) { return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); } diff --git a/fs/namespace.c b/fs/namespace.c index 3876a0fbaa6..228d8c4bfd1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2171,8 +2171,8 @@ static void chroot_fs_refs(struct path *old_root, struct path *new_root) * though, so you may need to say mount --bind /nfs/my_root /nfs/my_root * first. */ -asmlinkage long sys_pivot_root(const char __user * new_root, - const char __user * put_old) +SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, + const char __user *, put_old) { struct vfsmount *tmp; struct path new, old, parent_path, root_parent, root; diff --git a/fs/open.c b/fs/open.c index 322bb60d168..9b926de6ed9 100644 --- a/fs/open.c +++ b/fs/open.c @@ -569,7 +569,7 @@ out: return error; } -asmlinkage long sys_chroot(const char __user * filename) +SYSCALL_DEFINE1(chroot, const char __user *, filename) { struct path path; int error; diff --git a/mm/madvise.c b/mm/madvise.c index f9349c18a1b..b9ce574827c 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -281,7 +281,7 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, * -EBADF - map exists, but area maps something that isn't a file. * -EAGAIN - a kernel resource was temporarily unavailable. */ -asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) +SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior) { unsigned long end, tmp; struct vm_area_struct * vma, *prev; diff --git a/mm/mincore.c b/mm/mincore.c index 5178800bc12..8cb508f84ea 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -177,8 +177,8 @@ none_mapped: * mapped * -EAGAIN - A kernel resource was temporarily unavailable. */ -asmlinkage long sys_mincore(unsigned long start, size_t len, - unsigned char __user * vec) +SYSCALL_DEFINE3(mincore, unsigned long, start, size_t, len, + unsigned char __user *, vec) { long retval; unsigned long pages; diff --git a/mm/mlock.c b/mm/mlock.c index 04d5e7429c5..2904a347e47 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -595,7 +595,7 @@ out: return 0; } -asmlinkage long sys_mlockall(int flags) +SYSCALL_DEFINE1(mlockall, int, flags) { unsigned long lock_limit; int ret = -EINVAL; @@ -623,7 +623,7 @@ out: return ret; } -asmlinkage long sys_munlockall(void) +SYSCALL_DEFINE0(munlockall) { int ret; -- cgit From a26eab2400f0477bfac0255600552394855016f7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:17 +0100 Subject: [CVE-2009-0029] System call wrappers part 15 Signed-off-by: Heiko Carstens --- fs/fcntl.c | 11 ++++++----- fs/ioctl.c | 2 +- fs/namei.c | 2 +- fs/open.c | 4 ++-- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/fcntl.c b/fs/fcntl.c index cdc14194672..bd215cc791d 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -50,7 +50,7 @@ static int get_close_on_exec(unsigned int fd) return res; } -asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) +SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags) { int err = -EBADF; struct file * file, *tofree; @@ -113,7 +113,7 @@ out_unlock: return err; } -asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) +SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd) { if (unlikely(newfd == oldfd)) { /* corner case */ struct files_struct *files = current->files; @@ -126,7 +126,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) return sys_dup3(oldfd, newfd, 0); } -asmlinkage long sys_dup(unsigned int fildes) +SYSCALL_DEFINE1(dup, unsigned int, fildes) { int ret = -EBADF; struct file *file = fget(fildes); @@ -335,7 +335,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, return err; } -asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg) +SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct file *filp; long err = -EBADF; @@ -358,7 +358,8 @@ out: } #if BITS_PER_LONG == 32 -asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg) +SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, + unsigned long, arg) { struct file * filp; long err; diff --git a/fs/ioctl.c b/fs/ioctl.c index 20b0a8a24c6..240ec63984c 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -542,7 +542,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, return error; } -asmlinkage long sys_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) +SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct file *filp; int error = -EBADF; diff --git a/fs/namei.c b/fs/namei.c index 43fa2525972..00c4f37a039 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2718,7 +2718,7 @@ exit: return error; } -asmlinkage long sys_rename(const char __user *oldname, const char __user *newname) +SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) { return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); } diff --git a/fs/open.c b/fs/open.c index 9b926de6ed9..ecc75a2c262 100644 --- a/fs/open.c +++ b/fs/open.c @@ -594,7 +594,7 @@ out: return error; } -asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) +SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode) { struct inode * inode; struct dentry * dentry; @@ -658,7 +658,7 @@ out: return error; } -asmlinkage long sys_chmod(const char __user *filename, mode_t mode) +SYSCALL_DEFINE2(chmod, const char __user *, filename, mode_t, mode) { return sys_fchmodat(AT_FDCWD, filename, mode); } -- cgit From 002c8976ee537724b20a5e179d9b349309438836 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:18 +0100 Subject: [CVE-2009-0029] System call wrappers part 16 Signed-off-by: Heiko Carstens --- fs/aio.c | 22 +++++++++++----------- fs/locks.c | 2 +- fs/open.c | 2 +- fs/read_write.c | 4 ++-- fs/stat.c | 4 ++-- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index d6f89d3c15e..8fa77e23394 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1270,7 +1270,7 @@ static void io_destroy(struct kioctx *ioctx) * pointer is passed for ctxp. Will fail with -ENOSYS if not * implemented. */ -asmlinkage long sys_io_setup(unsigned nr_events, aio_context_t __user *ctxp) +SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) { struct kioctx *ioctx = NULL; unsigned long ctx; @@ -1308,7 +1308,7 @@ out: * implemented. May fail with -EFAULT if the context pointed to * is invalid. */ -asmlinkage long sys_io_destroy(aio_context_t ctx) +SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) { struct kioctx *ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { @@ -1662,8 +1662,8 @@ out_put_req: * are available to queue any iocbs. Will return 0 if nr is 0. Will * fail with -ENOSYS if not implemented. */ -asmlinkage long sys_io_submit(aio_context_t ctx_id, long nr, - struct iocb __user * __user *iocbpp) +SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr, + struct iocb __user * __user *, iocbpp) { struct kioctx *ctx; long ret = 0; @@ -1737,8 +1737,8 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, * invalid. May fail with -EAGAIN if the iocb specified was not * cancelled. Will fail with -ENOSYS if not implemented. */ -asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, - struct io_event __user *result) +SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, + struct io_event __user *, result) { int (*cancel)(struct kiocb *iocb, struct io_event *res); struct kioctx *ctx; @@ -1799,11 +1799,11 @@ asmlinkage long sys_io_cancel(aio_context_t ctx_id, struct iocb __user *iocb, * will be updated if not NULL and the operation blocks. Will fail * with -ENOSYS if not implemented. */ -asmlinkage long sys_io_getevents(aio_context_t ctx_id, - long min_nr, - long nr, - struct io_event __user *events, - struct timespec __user *timeout) +SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, + long, min_nr, + long, nr, + struct io_event __user *, events, + struct timespec __user *, timeout) { struct kioctx *ioctx = lookup_ioctx(ctx_id); long ret = -EINVAL; diff --git a/fs/locks.c b/fs/locks.c index 46a2e12f7d4..ec3deea29e3 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1564,7 +1564,7 @@ EXPORT_SYMBOL(flock_lock_file_wait); * %LOCK_MAND can be combined with %LOCK_READ or %LOCK_WRITE to allow other * processes read and write access respectively. */ -asmlinkage long sys_flock(unsigned int fd, unsigned int cmd) +SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) { struct file *filp; struct file_lock *lock; diff --git a/fs/open.c b/fs/open.c index ecc75a2c262..293408b1c16 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1081,7 +1081,7 @@ asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, * For backward compatibility? Maybe this should be moved * into arch/i386 instead? */ -asmlinkage long sys_creat(const char __user * pathname, int mode) +SYSCALL_DEFINE2(creat, const char __user *, pathname, int, mode) { return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode); } diff --git a/fs/read_write.c b/fs/read_write.c index 7a8326bc590..0671aa016b6 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -828,7 +828,7 @@ out: return retval; } -asmlinkage long sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t count) +SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) { loff_t pos; off_t off; @@ -847,7 +847,7 @@ asmlinkage long sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t return do_sendfile(out_fd, in_fd, NULL, count, 0); } -asmlinkage long sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count) +SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) { loff_t pos; ssize_t ret; diff --git a/fs/stat.c b/fs/stat.c index f29c5fe4f8b..d712a0dfb50 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -320,8 +320,8 @@ asmlinkage long sys_readlinkat(int dfd, const char __user *pathname, return error; } -asmlinkage long sys_readlink(const char __user *path, char __user *buf, - int bufsiz) +SYSCALL_DEFINE3(readlink, const char __user *, path, char __user *, buf, + int, bufsiz) { return sys_readlinkat(AT_FDCWD, path, buf, bufsiz); } -- cgit From ca013e945b1ba5828b151ee646946f1297b67a4c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:19 +0100 Subject: [CVE-2009-0029] System call wrappers part 17 Signed-off-by: Heiko Carstens --- fs/open.c | 16 +++++++--------- kernel/uid16.c | 6 +++--- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/fs/open.c b/fs/open.c index 293408b1c16..4a6d8006474 100644 --- a/fs/open.c +++ b/fs/open.c @@ -517,7 +517,7 @@ out: return res; } -asmlinkage long sys_access(const char __user *filename, int mode) +SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) { return sys_faccessat(AT_FDCWD, filename, mode); } @@ -688,7 +688,7 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group) return error; } -asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group) +SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) { struct path path; int error; @@ -732,7 +732,7 @@ out: return error; } -asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group) +SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group) { struct path path; int error; @@ -751,8 +751,7 @@ out: return error; } - -asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group) +SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) { struct file * file; int error = -EBADF; @@ -1048,7 +1047,7 @@ long do_sys_open(int dfd, const char __user *filename, int flags, int mode) return fd; } -asmlinkage long sys_open(const char __user *filename, int flags, int mode) +SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode) { long ret; @@ -1117,7 +1116,7 @@ EXPORT_SYMBOL(filp_close); * releasing the fd. This ensures that one clone task can't release * an fd while another clone is opening it. */ -asmlinkage long sys_close(unsigned int fd) +SYSCALL_DEFINE1(close, unsigned int, fd) { struct file * filp; struct files_struct *files = current->files; @@ -1150,14 +1149,13 @@ out_unlock: spin_unlock(&files->file_lock); return -EBADF; } - EXPORT_SYMBOL(sys_close); /* * This routine simulates a hangup on the tty, to arrange that users * are given clean terminals at login time. */ -asmlinkage long sys_vhangup(void) +SYSCALL_DEFINE0(vhangup) { if (capable(CAP_SYS_TTY_CONFIG)) { tty_vhangup_self(); diff --git a/kernel/uid16.c b/kernel/uid16.c index 2460c3199b5..37f48c049a2 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -17,7 +17,7 @@ #include -asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gid_t group) +SYSCALL_DEFINE3(chown16, const char __user *, filename, old_uid_t, user, old_gid_t, group) { long ret = sys_chown(filename, low2highuid(user), low2highgid(group)); /* avoid REGPARM breakage on x86: */ @@ -25,7 +25,7 @@ asmlinkage long sys_chown16(const char __user * filename, old_uid_t user, old_gi return ret; } -asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_gid_t group) +SYSCALL_DEFINE3(lchown16, const char __user *, filename, old_uid_t, user, old_gid_t, group) { long ret = sys_lchown(filename, low2highuid(user), low2highgid(group)); /* avoid REGPARM breakage on x86: */ @@ -33,7 +33,7 @@ asmlinkage long sys_lchown16(const char __user * filename, old_uid_t user, old_g return ret; } -asmlinkage long sys_fchown16(unsigned int fd, old_uid_t user, old_gid_t group) +SYSCALL_DEFINE3(fchown16, unsigned int, fd, old_uid_t, user, old_gid_t, group) { long ret = sys_fchown(fd, low2highuid(user), low2highgid(group)); /* avoid REGPARM breakage on x86: */ -- cgit From a6b42e83f249aad723589b2bdf6d1dfb2b0997c8 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:20 +0100 Subject: [CVE-2009-0029] System call wrappers part 18 Signed-off-by: Heiko Carstens --- kernel/uid16.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/kernel/uid16.c b/kernel/uid16.c index 37f48c049a2..221894e6e98 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -41,7 +41,7 @@ SYSCALL_DEFINE3(fchown16, unsigned int, fd, old_uid_t, user, old_gid_t, group) return ret; } -asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid) +SYSCALL_DEFINE2(setregid16, old_gid_t, rgid, old_gid_t, egid) { long ret = sys_setregid(low2highgid(rgid), low2highgid(egid)); /* avoid REGPARM breakage on x86: */ @@ -49,7 +49,7 @@ asmlinkage long sys_setregid16(old_gid_t rgid, old_gid_t egid) return ret; } -asmlinkage long sys_setgid16(old_gid_t gid) +SYSCALL_DEFINE1(setgid16, old_gid_t, gid) { long ret = sys_setgid(low2highgid(gid)); /* avoid REGPARM breakage on x86: */ @@ -57,7 +57,7 @@ asmlinkage long sys_setgid16(old_gid_t gid) return ret; } -asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid) +SYSCALL_DEFINE2(setreuid16, old_uid_t, ruid, old_uid_t, euid) { long ret = sys_setreuid(low2highuid(ruid), low2highuid(euid)); /* avoid REGPARM breakage on x86: */ @@ -65,7 +65,7 @@ asmlinkage long sys_setreuid16(old_uid_t ruid, old_uid_t euid) return ret; } -asmlinkage long sys_setuid16(old_uid_t uid) +SYSCALL_DEFINE1(setuid16, old_uid_t, uid) { long ret = sys_setuid(low2highuid(uid)); /* avoid REGPARM breakage on x86: */ @@ -73,7 +73,7 @@ asmlinkage long sys_setuid16(old_uid_t uid) return ret; } -asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) +SYSCALL_DEFINE3(setresuid16, old_uid_t, ruid, old_uid_t, euid, old_uid_t, suid) { long ret = sys_setresuid(low2highuid(ruid), low2highuid(euid), low2highuid(suid)); @@ -82,7 +82,7 @@ asmlinkage long sys_setresuid16(old_uid_t ruid, old_uid_t euid, old_uid_t suid) return ret; } -asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, old_uid_t __user *suid) +SYSCALL_DEFINE3(getresuid16, old_uid_t __user *, ruid, old_uid_t __user *, euid, old_uid_t __user *, suid) { const struct cred *cred = current_cred(); int retval; @@ -94,7 +94,7 @@ asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, return retval; } -asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) +SYSCALL_DEFINE3(setresgid16, old_gid_t, rgid, old_gid_t, egid, old_gid_t, sgid) { long ret = sys_setresgid(low2highgid(rgid), low2highgid(egid), low2highgid(sgid)); @@ -103,7 +103,8 @@ asmlinkage long sys_setresgid16(old_gid_t rgid, old_gid_t egid, old_gid_t sgid) return ret; } -asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, old_gid_t __user *sgid) + +SYSCALL_DEFINE3(getresgid16, old_gid_t __user *, rgid, old_gid_t __user *, egid, old_gid_t __user *, sgid) { const struct cred *cred = current_cred(); int retval; @@ -115,7 +116,7 @@ asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, return retval; } -asmlinkage long sys_setfsuid16(old_uid_t uid) +SYSCALL_DEFINE1(setfsuid16, old_uid_t, uid) { long ret = sys_setfsuid(low2highuid(uid)); /* avoid REGPARM breakage on x86: */ @@ -123,7 +124,7 @@ asmlinkage long sys_setfsuid16(old_uid_t uid) return ret; } -asmlinkage long sys_setfsgid16(old_gid_t gid) +SYSCALL_DEFINE1(setfsgid16, old_gid_t, gid) { long ret = sys_setfsgid(low2highgid(gid)); /* avoid REGPARM breakage on x86: */ -- cgit From 003d7ab479168132a2b2c6700fe682b08f08ab0c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:21 +0100 Subject: [CVE-2009-0029] System call wrappers part 19 Signed-off-by: Heiko Carstens --- fs/read_write.c | 8 ++++---- fs/utimes.c | 5 +++-- kernel/uid16.c | 12 ++++++------ 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/fs/read_write.c b/fs/read_write.c index 0671aa016b6..fad10af59d9 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -147,7 +147,7 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int origin) } EXPORT_SYMBOL(vfs_llseek); -asmlinkage long sys_lseek(unsigned int fd, off_t offset, unsigned int origin) +SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) { off_t retval; struct file * file; @@ -171,9 +171,9 @@ bad: } #ifdef __ARCH_WANT_SYS_LLSEEK -asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, - unsigned long offset_low, loff_t __user * result, - unsigned int origin) +SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, + unsigned long, offset_low, loff_t __user *, result, + unsigned int, origin) { int retval; struct file * file; diff --git a/fs/utimes.c b/fs/utimes.c index 6929e3e91d0..ee853615798 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -24,7 +24,7 @@ * must be owner or have write permission. * Else, update from *times, must be owner or super user. */ -asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times) +SYSCALL_DEFINE2(utime, char __user *, filename, struct utimbuf __user *, times) { struct timespec tv[2]; @@ -214,7 +214,8 @@ asmlinkage long sys_futimesat(int dfd, char __user *filename, struct timeval __u return do_utimes(dfd, filename, utimes ? tstimes : NULL, 0); } -asmlinkage long sys_utimes(char __user *filename, struct timeval __user *utimes) +SYSCALL_DEFINE2(utimes, char __user *, filename, + struct timeval __user *, utimes) { return sys_futimesat(AT_FDCWD, filename, utimes); } diff --git a/kernel/uid16.c b/kernel/uid16.c index 221894e6e98..0314501688b 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -162,7 +162,7 @@ static int groups16_from_user(struct group_info *group_info, return 0; } -asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist) +SYSCALL_DEFINE2(getgroups16, int, gidsetsize, old_gid_t __user *, grouplist) { const struct cred *cred = current_cred(); int i; @@ -185,7 +185,7 @@ out: return i; } -asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist) +SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist) { struct group_info *group_info; int retval; @@ -210,22 +210,22 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist) return retval; } -asmlinkage long sys_getuid16(void) +SYSCALL_DEFINE0(getuid16) { return high2lowuid(current_uid()); } -asmlinkage long sys_geteuid16(void) +SYSCALL_DEFINE0(geteuid16) { return high2lowuid(current_euid()); } -asmlinkage long sys_getgid16(void) +SYSCALL_DEFINE0(getgid16) { return high2lowgid(current_gid()); } -asmlinkage long sys_getegid16(void) +SYSCALL_DEFINE0(getegid16) { return high2lowgid(current_egid()); } -- cgit From 3cdad42884bbd95d5aa01297e8236ea1bad70053 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:22 +0100 Subject: [CVE-2009-0029] System call wrappers part 20 Signed-off-by: Heiko Carstens --- fs/dcache.c | 2 +- fs/namei.c | 4 ++-- fs/open.c | 4 ++-- fs/quota.c | 3 ++- fs/read_write.c | 13 +++++++------ 5 files changed, 14 insertions(+), 12 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 4547f66884a..937df0fb0da 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2092,7 +2092,7 @@ Elong: * return NULL; * } */ -asmlinkage long sys_getcwd(char __user *buf, unsigned long size) +SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size) { int error; struct path pwd, root; diff --git a/fs/namei.c b/fs/namei.c index 00c4f37a039..90520f05f99 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2081,7 +2081,7 @@ out_err: return error; } -asmlinkage long sys_mkdir(const char __user *pathname, int mode) +SYSCALL_DEFINE2(mkdir, const char __user *, pathname, int, mode) { return sys_mkdirat(AT_FDCWD, pathname, mode); } @@ -2195,7 +2195,7 @@ exit1: return error; } -asmlinkage long sys_rmdir(const char __user *pathname) +SYSCALL_DEFINE1(rmdir, const char __user *, pathname) { return do_rmdir(AT_FDCWD, pathname); } diff --git a/fs/open.c b/fs/open.c index 4a6d8006474..bc49e3c388d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -522,7 +522,7 @@ SYSCALL_DEFINE2(access, const char __user *, filename, int, mode) return sys_faccessat(AT_FDCWD, filename, mode); } -asmlinkage long sys_chdir(const char __user * filename) +SYSCALL_DEFINE1(chdir, const char __user *, filename) { struct path path; int error; @@ -543,7 +543,7 @@ out: return error; } -asmlinkage long sys_fchdir(unsigned int fd) +SYSCALL_DEFINE1(fchdir, unsigned int, fd) { struct file *file; struct inode *inode; diff --git a/fs/quota.c b/fs/quota.c index 4a8c94f05f7..d76ada914f9 100644 --- a/fs/quota.c +++ b/fs/quota.c @@ -371,7 +371,8 @@ static inline struct super_block *quotactl_block(const char __user *special) * calls. Maybe we need to add the process quotas etc. in the future, * but we probably should use rlimits for that. */ -asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t id, void __user *addr) +SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, + qid_t, id, void __user *, addr) { uint cmds, type; struct super_block *sb = NULL; diff --git a/fs/read_write.c b/fs/read_write.c index fad10af59d9..400fe81c973 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -369,7 +369,7 @@ static inline void file_pos_write(struct file *file, loff_t pos) file->f_pos = pos; } -asmlinkage long sys_read(unsigned int fd, char __user * buf, size_t count) +SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) { struct file *file; ssize_t ret = -EBADF; @@ -386,7 +386,8 @@ asmlinkage long sys_read(unsigned int fd, char __user * buf, size_t count) return ret; } -asmlinkage long sys_write(unsigned int fd, const char __user * buf, size_t count) +SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, + size_t, count) { struct file *file; ssize_t ret = -EBADF; @@ -688,8 +689,8 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, EXPORT_SYMBOL(vfs_writev); -asmlinkage long -sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) +SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, + unsigned long, vlen) { struct file *file; ssize_t ret = -EBADF; @@ -709,8 +710,8 @@ sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) return ret; } -asmlinkage long -sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) +SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, + unsigned long, vlen) { struct file *file; ssize_t ret = -EBADF; -- cgit From 20f37034fb966a1c35894f9fe529fda0b6440101 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:23 +0100 Subject: [CVE-2009-0029] System call wrappers part 21 Signed-off-by: Heiko Carstens --- fs/readdir.c | 6 ++++-- net/socket.c | 30 +++++++++++++++--------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/fs/readdir.c b/fs/readdir.c index 8b4c2a0051a..cf6a0e39819 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -187,7 +187,8 @@ efault: return -EFAULT; } -asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user * dirent, unsigned int count) +SYSCALL_DEFINE3(getdents, unsigned int, fd, + struct linux_dirent __user *, dirent, unsigned int, count) { struct file * file; struct linux_dirent __user * lastdirent; @@ -268,7 +269,8 @@ efault: return -EFAULT; } -asmlinkage long sys_getdents64(unsigned int fd, struct linux_dirent64 __user * dirent, unsigned int count) +SYSCALL_DEFINE3(getdents64, unsigned int, fd, + struct linux_dirent64 __user *, dirent, unsigned int, count) { struct file * file; struct linux_dirent64 __user * lastdirent; diff --git a/net/socket.c b/net/socket.c index cc9b666e58f..fdd72c586a1 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1356,7 +1356,7 @@ out_fd1: * the protocol layer (having also checked the address is ok). */ -asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) +SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) { struct socket *sock; struct sockaddr_storage address; @@ -1418,8 +1418,8 @@ asmlinkage long sys_listen(int fd, int backlog) * clean when we restucture accept also. */ -asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags) +SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, + int __user *, upeer_addrlen, int, flags) { struct socket *sock, *newsock; struct file *newfile; @@ -1502,8 +1502,8 @@ out_fd: goto out_put; } -asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen) +SYSCALL_DEFINE3(accept, int, fd, struct sockaddr __user *, upeer_sockaddr, + int __user *, upeer_addrlen) { return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); } @@ -1520,8 +1520,8 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, * include the -EINPROGRESS status for such sockets. */ -asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, - int addrlen) +SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, + int, addrlen) { struct socket *sock; struct sockaddr_storage address; @@ -1552,8 +1552,8 @@ out: * name to user space. */ -asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, - int __user *usockaddr_len) +SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, + int __user *, usockaddr_len) { struct socket *sock; struct sockaddr_storage address; @@ -1583,8 +1583,8 @@ out: * name to user space. */ -asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, - int __user *usockaddr_len) +SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, + int __user *, usockaddr_len) { struct socket *sock; struct sockaddr_storage address; @@ -1725,8 +1725,8 @@ asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, * to pass the user mode parameter for the protocols to sort out. */ -asmlinkage long sys_setsockopt(int fd, int level, int optname, - char __user *optval, int optlen) +SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, + char __user *, optval, int, optlen) { int err, fput_needed; struct socket *sock; @@ -1759,8 +1759,8 @@ out_put: * to pass a user mode parameter for the protocols to sort out. */ -asmlinkage long sys_getsockopt(int fd, int level, int optname, - char __user *optval, int __user *optlen) +SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname, + char __user *, optval, int __user *, optlen) { int err, fput_needed; struct socket *sock; -- cgit From 3e0fa65f8ba4fd24b3dcfaf14d5b15eaab0fdc61 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:24 +0100 Subject: [CVE-2009-0029] System call wrappers part 22 Signed-off-by: Heiko Carstens --- net/socket.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/net/socket.c b/net/socket.c index fdd72c586a1..35dd7371752 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1214,7 +1214,7 @@ int sock_create_kern(int family, int type, int protocol, struct socket **res) return __sock_create(&init_net, family, type, protocol, res, 1); } -asmlinkage long sys_socket(int family, int type, int protocol) +SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) { int retval; struct socket *sock; @@ -1255,8 +1255,8 @@ out_release: * Create a pair of connected sockets. */ -asmlinkage long sys_socketpair(int family, int type, int protocol, - int __user *usockvec) +SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, + int __user *, usockvec) { struct socket *sock1, *sock2; int fd1, fd2, err; @@ -1385,7 +1385,7 @@ SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) * ready for listening. */ -asmlinkage long sys_listen(int fd, int backlog) +SYSCALL_DEFINE2(listen, int, fd, int, backlog) { struct socket *sock; int err, fput_needed; @@ -1615,9 +1615,9 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, * the protocol. */ -asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, - unsigned flags, struct sockaddr __user *addr, - int addr_len) +SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, + unsigned, flags, struct sockaddr __user *, addr, + int, addr_len) { struct socket *sock; struct sockaddr_storage address; @@ -1660,7 +1660,8 @@ out: * Send a datagram down a socket. */ -asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) +SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, + unsigned, flags) { return sys_sendto(fd, buff, len, flags, NULL, 0); } @@ -1671,9 +1672,9 @@ asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) * sender address from kernel to user space. */ -asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, - unsigned flags, struct sockaddr __user *addr, - int __user *addr_len) +SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, + unsigned, flags, struct sockaddr __user *, addr, + int __user *, addr_len) { struct socket *sock; struct iovec iov; @@ -1815,7 +1816,7 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how) * BSD sendmsg interface */ -asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) +SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; @@ -1921,8 +1922,8 @@ out: * BSD recvmsg interface */ -asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, - unsigned int flags) +SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, + unsigned int, flags) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; @@ -2045,7 +2046,7 @@ static const unsigned char nargs[19]={ * it is set by the callees. */ -asmlinkage long sys_socketcall(int call, unsigned long __user *args) +SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) { unsigned long a[6]; unsigned long a0, a1; -- cgit From 5a8a82b1d306a325d899b67715618413657efda4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:25 +0100 Subject: [CVE-2009-0029] System call wrappers part 23 Signed-off-by: Heiko Carstens --- fs/eventpoll.c | 18 +++++++++--------- fs/select.c | 8 ++++---- kernel/sys.c | 6 +++--- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 96355d50534..ba2f9ec7119 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1110,7 +1110,7 @@ retry: /* * Open an eventpoll file descriptor. */ -asmlinkage long sys_epoll_create1(int flags) +SYSCALL_DEFINE1(epoll_create1, int, flags) { int error, fd = -1; struct eventpoll *ep; @@ -1150,7 +1150,7 @@ error_return: return fd; } -asmlinkage long sys_epoll_create(int size) +SYSCALL_DEFINE1(epoll_create, int, size) { if (size < 0) return -EINVAL; @@ -1163,8 +1163,8 @@ asmlinkage long sys_epoll_create(int size) * the eventpoll file that enables the insertion/removal/change of * file descriptors inside the interest set. */ -asmlinkage long sys_epoll_ctl(int epfd, int op, int fd, - struct epoll_event __user *event) +SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, + struct epoll_event __user *, event) { int error; struct file *file, *tfile; @@ -1261,8 +1261,8 @@ error_return: * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_wait(2). */ -asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout) +SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, + int, maxevents, int, timeout) { int error; struct file *file; @@ -1319,9 +1319,9 @@ error_return: * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_pwait(2). */ -asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout, const sigset_t __user *sigmask, - size_t sigsetsize) +SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, + int, maxevents, int, timeout, const sigset_t __user *, sigmask, + size_t, sigsetsize) { int error; sigset_t ksigmask, sigsaved; diff --git a/fs/select.c b/fs/select.c index d1651648be1..338f703403a 100644 --- a/fs/select.c +++ b/fs/select.c @@ -557,8 +557,8 @@ out_nofds: return ret; } -asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, struct timeval __user *tvp) +SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp, + fd_set __user *, exp, struct timeval __user *, tvp) { struct timespec end_time, *to = NULL; struct timeval tv; @@ -854,8 +854,8 @@ static long do_restart_poll(struct restart_block *restart_block) return ret; } -asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, - long timeout_msecs) +SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, + long, timeout_msecs) { struct timespec end_time, *to = NULL; int ret; diff --git a/kernel/sys.c b/kernel/sys.c index 39b192b4003..5292f2119da 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1406,7 +1406,7 @@ asmlinkage long sys_newuname(struct new_utsname __user * name) return errno; } -asmlinkage long sys_sethostname(char __user *name, int len) +SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) { int errno; char tmp[__NEW_UTS_LEN]; @@ -1430,7 +1430,7 @@ asmlinkage long sys_sethostname(char __user *name, int len) #ifdef __ARCH_WANT_SYS_GETHOSTNAME -asmlinkage long sys_gethostname(char __user *name, int len) +SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) { int i, errno; struct new_utsname *u; @@ -1455,7 +1455,7 @@ asmlinkage long sys_gethostname(char __user *name, int len) * Only setdomainname; getdomainname can be implemented by calling * uname() */ -asmlinkage long sys_setdomainname(char __user *name, int len) +SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) { int errno; char tmp[__NEW_UTS_LEN]; -- cgit From e48fbb699f82ef1e80bd7126046394d2dc9ca7e6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:26 +0100 Subject: [CVE-2009-0029] System call wrappers part 24 Signed-off-by: Heiko Carstens --- ipc/msg.c | 12 ++++++------ kernel/sys.c | 13 +++++++------ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index b4eee1c6101..2ceab7f12fc 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -309,7 +309,7 @@ static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg) return security_msg_queue_associate(msq, msgflg); } -asmlinkage long sys_msgget(key_t key, int msgflg) +SYSCALL_DEFINE2(msgget, key_t, key, int, msgflg) { struct ipc_namespace *ns; struct ipc_ops msg_ops; @@ -466,7 +466,7 @@ out_up: return err; } -asmlinkage long sys_msgctl(int msqid, int cmd, struct msqid_ds __user *buf) +SYSCALL_DEFINE3(msgctl, int, msqid, int, cmd, struct msqid_ds __user *, buf) { struct msg_queue *msq; int err, version; @@ -723,8 +723,8 @@ out_free: return err; } -asmlinkage long -sys_msgsnd(int msqid, struct msgbuf __user *msgp, size_t msgsz, int msgflg) +SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, + int, msgflg) { long mtype; @@ -904,8 +904,8 @@ out_unlock: return msgsz; } -asmlinkage long sys_msgrcv(int msqid, struct msgbuf __user *msgp, size_t msgsz, - long msgtyp, int msgflg) +SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz, + long, msgtyp, int, msgflg) { long err, mtype; diff --git a/kernel/sys.c b/kernel/sys.c index 5292f2119da..70ffa8408cd 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1395,7 +1395,7 @@ EXPORT_SYMBOL(in_egroup_p); DECLARE_RWSEM(uts_sem); -asmlinkage long sys_newuname(struct new_utsname __user * name) +SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) { int errno = 0; @@ -1478,7 +1478,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) return errno; } -asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim) +SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) { if (resource >= RLIM_NLIMITS) return -EINVAL; @@ -1497,7 +1497,8 @@ asmlinkage long sys_getrlimit(unsigned int resource, struct rlimit __user *rlim) * Back compatibility for getrlimit. Needed for some apps. */ -asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *rlim) +SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, + struct rlimit __user *, rlim) { struct rlimit x; if (resource >= RLIM_NLIMITS) @@ -1515,7 +1516,7 @@ asmlinkage long sys_old_getrlimit(unsigned int resource, struct rlimit __user *r #endif -asmlinkage long sys_setrlimit(unsigned int resource, struct rlimit __user *rlim) +SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) { struct rlimit new_rlim, *old_rlim; int retval; @@ -1688,7 +1689,7 @@ int getrusage(struct task_struct *p, int who, struct rusage __user *ru) return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; } -asmlinkage long sys_getrusage(int who, struct rusage __user *ru) +SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) { if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && who != RUSAGE_THREAD) @@ -1696,7 +1697,7 @@ asmlinkage long sys_getrusage(int who, struct rusage __user *ru) return getrusage(current, who, ru); } -asmlinkage long sys_umask(int mask) +SYSCALL_DEFINE1(umask, int, mask) { mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); return mask; -- cgit From d5460c9974a321a194aded4a8c4daaac68ea8171 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:27 +0100 Subject: [CVE-2009-0029] System call wrappers part 25 Signed-off-by: Heiko Carstens --- ipc/mqueue.c | 6 +++--- ipc/sem.c | 9 +++++---- ipc/shm.c | 8 ++++---- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 6df028b7054..faac04c85e7 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -650,8 +650,8 @@ static struct file *do_open(struct dentry *dentry, int oflag) return dentry_open(dentry, mqueue_mnt, oflag, cred); } -asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode, - struct mq_attr __user *u_attr) +SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, mode_t, mode, + struct mq_attr __user *, u_attr) { struct dentry *dentry; struct file *filp; @@ -721,7 +721,7 @@ out_putname: return fd; } -asmlinkage long sys_mq_unlink(const char __user *u_name) +SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) { int err; char *name; diff --git a/ipc/sem.c b/ipc/sem.c index c385c40c061..16a2189e96f 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -308,7 +308,7 @@ static inline int sem_more_checks(struct kern_ipc_perm *ipcp, return 0; } -asmlinkage long sys_semget(key_t key, int nsems, int semflg) +SYSCALL_DEFINE3(semget, key_t, key, int, nsems, int, semflg) { struct ipc_namespace *ns; struct ipc_ops sem_ops; @@ -1055,8 +1055,8 @@ out: return un; } -asmlinkage long sys_semtimedop(int semid, struct sembuf __user *tsops, - unsigned nsops, const struct timespec __user *timeout) +SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops, + unsigned, nsops, const struct timespec __user *, timeout) { int error = -EINVAL; struct sem_array *sma; @@ -1232,7 +1232,8 @@ out_free: return error; } -asmlinkage long sys_semop (int semid, struct sembuf __user *tsops, unsigned nsops) +SYSCALL_DEFINE3(semop, int, semid, struct sembuf __user *, tsops, + unsigned, nsops) { return sys_semtimedop(semid, tsops, nsops, NULL); } diff --git a/ipc/shm.c b/ipc/shm.c index d0ab5527bf4..a9e09ad2263 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -440,7 +440,7 @@ static inline int shm_more_checks(struct kern_ipc_perm *ipcp, return 0; } -asmlinkage long sys_shmget (key_t key, size_t size, int shmflg) +SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg) { struct ipc_namespace *ns; struct ipc_ops shm_ops; @@ -621,7 +621,7 @@ out_up: return err; } -asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) +SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) { struct shmid_kernel *shp; int err, version; @@ -939,7 +939,7 @@ out_put_dentry: goto out_nattch; } -asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg) +SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg) { unsigned long ret; long err; @@ -955,7 +955,7 @@ asmlinkage long sys_shmat(int shmid, char __user *shmaddr, int shmflg) * detach and kill segment if marked destroyed. * The work is done in shm_close. */ -asmlinkage long sys_shmdt(char __user *shmaddr) +SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *next; -- cgit From c4ea37c26a691ad0b7e86aa5884aab27830e95c9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:28 +0100 Subject: [CVE-2009-0029] System call wrappers part 26 Signed-off-by: Heiko Carstens --- drivers/pci/syscall.c | 12 ++++-------- ipc/mqueue.c | 22 +++++++++++----------- kernel/sys.c | 4 ++-- mm/swapfile.c | 4 ++-- 4 files changed, 19 insertions(+), 23 deletions(-) diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c index 645d7a60e41..ec22284eed3 100644 --- a/drivers/pci/syscall.c +++ b/drivers/pci/syscall.c @@ -14,10 +14,8 @@ #include #include "pci.h" -asmlinkage long -sys_pciconfig_read(unsigned long bus, unsigned long dfn, - unsigned long off, unsigned long len, - void __user *buf) +SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn, + unsigned long, off, unsigned long, len, void __user *, buf) { struct pci_dev *dev; u8 byte; @@ -86,10 +84,8 @@ error: return err; } -asmlinkage long -sys_pciconfig_write(unsigned long bus, unsigned long dfn, - unsigned long off, unsigned long len, - void __user *buf) +SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn, + unsigned long, off, unsigned long, len, void __user *, buf) { struct pci_dev *dev; u8 byte; diff --git a/ipc/mqueue.c b/ipc/mqueue.c index faac04c85e7..54b4077fed7 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -814,9 +814,9 @@ static inline void pipelined_receive(struct mqueue_inode_info *info) sender->state = STATE_READY; } -asmlinkage long sys_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr, - size_t msg_len, unsigned int msg_prio, - const struct timespec __user *u_abs_timeout) +SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr, + size_t, msg_len, unsigned int, msg_prio, + const struct timespec __user *, u_abs_timeout) { struct file *filp; struct inode *inode; @@ -907,9 +907,9 @@ out: return ret; } -asmlinkage long sys_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr, - size_t msg_len, unsigned int __user *u_msg_prio, - const struct timespec __user *u_abs_timeout) +SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr, + size_t, msg_len, unsigned int __user *, u_msg_prio, + const struct timespec __user *, u_abs_timeout) { long timeout; ssize_t ret; @@ -997,8 +997,8 @@ out: * and he isn't currently owner of notification, will be silently discarded. * It isn't explicitly defined in the POSIX. */ -asmlinkage long sys_mq_notify(mqd_t mqdes, - const struct sigevent __user *u_notification) +SYSCALL_DEFINE2(mq_notify, mqd_t, mqdes, + const struct sigevent __user *, u_notification) { int ret; struct file *filp; @@ -1123,9 +1123,9 @@ out: return ret; } -asmlinkage long sys_mq_getsetattr(mqd_t mqdes, - const struct mq_attr __user *u_mqstat, - struct mq_attr __user *u_omqstat) +SYSCALL_DEFINE3(mq_getsetattr, mqd_t, mqdes, + const struct mq_attr __user *, u_mqstat, + struct mq_attr __user *, u_omqstat) { int ret; struct mq_attr mqstat, omqstat; diff --git a/kernel/sys.c b/kernel/sys.c index 70ffa8408cd..59aadcdad6c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1703,8 +1703,8 @@ SYSCALL_DEFINE1(umask, int, mask) return mask; } -asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) +SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, + unsigned long, arg4, unsigned long, arg5) { struct task_struct *me = current; unsigned char comm[sizeof(me->comm)]; diff --git a/mm/swapfile.c b/mm/swapfile.c index da422c47e2e..f48b831e5e5 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1377,7 +1377,7 @@ out: return ret; } -asmlinkage long sys_swapoff(const char __user * specialfile) +SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) { struct swap_info_struct * p = NULL; unsigned short *swap_map; @@ -1633,7 +1633,7 @@ late_initcall(max_swapfiles_check); * * The swapon system call */ -asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) +SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) { struct swap_info_struct * p; char *name = NULL; -- cgit From 1e7bfb2134dfec37ce04fb3a4ca89299e892d10c Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:29 +0100 Subject: [CVE-2009-0029] System call wrappers part 27 Signed-off-by: Heiko Carstens --- fs/exec.c | 2 +- fs/filesystems.c | 2 +- fs/nfsctl.c | 4 ++-- kernel/printk.c | 2 +- kernel/ptrace.c | 2 +- kernel/sysctl.c | 4 ++-- kernel/timer.c | 2 +- security/keys/keyctl.c | 18 +++++++++--------- 8 files changed, 18 insertions(+), 18 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 71a6efe5d8b..0dd60a01f1b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -99,7 +99,7 @@ static inline void put_binfmt(struct linux_binfmt * fmt) * * Also note that we take the address to load from from the file itself. */ -asmlinkage long sys_uselib(const char __user * library) +SYSCALL_DEFINE1(uselib, const char __user *, library) { struct file *file; struct nameidata nd; diff --git a/fs/filesystems.c b/fs/filesystems.c index d488dcd7f2b..1aa70260e6d 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -179,7 +179,7 @@ static int fs_maxindex(void) /* * Whee.. Weird sysv syscall. */ -asmlinkage long sys_sysfs(int option, unsigned long arg1, unsigned long arg2) +SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2) { int retval = -EINVAL; diff --git a/fs/nfsctl.c b/fs/nfsctl.c index b27451909df..8f9a20556f7 100644 --- a/fs/nfsctl.c +++ b/fs/nfsctl.c @@ -86,8 +86,8 @@ static struct { }, }; -long -asmlinkage sys_nfsservctl(int cmd, struct nfsctl_arg __user *arg, void __user *res) +SYSCALL_DEFINE3(nfsservctl, int, cmd, struct nfsctl_arg __user *, arg, + void __user *, res) { struct file *file; void __user *p = &arg->u; diff --git a/kernel/printk.c b/kernel/printk.c index e48cf33783f..69188f226a9 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -382,7 +382,7 @@ out: return error; } -asmlinkage long sys_syslog(int type, char __user *buf, int len) +SYSCALL_DEFINE3(syslog, int, type, char __user *, buf, int, len) { return do_syslog(type, buf, len); } diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 29dc700e198..c9cf48b21f0 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -574,7 +574,7 @@ struct task_struct *ptrace_get_task_struct(pid_t pid) #define arch_ptrace_attach(child) do { } while (0) #endif -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +SYSCALL_DEFINE4(ptrace, long, request, long, pid, long, addr, long, data) { struct task_struct *child; long ret; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 89d74436318..3e38b74b612 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1688,7 +1688,7 @@ int do_sysctl(int __user *name, int nlen, void __user *oldval, size_t __user *ol return error; } -asmlinkage long sys_sysctl(struct __sysctl_args __user *args) +SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args) { struct __sysctl_args tmp; int error; @@ -2989,7 +2989,7 @@ int sysctl_ms_jiffies(struct ctl_table *table, #else /* CONFIG_SYSCTL_SYSCALL */ -asmlinkage long sys_sysctl(struct __sysctl_args __user *args) +SYSCALL_DEFINE1(sysctl, struct __sysctl_args __user *, args) { struct __sysctl_args tmp; int error; diff --git a/kernel/timer.c b/kernel/timer.c index 14a51530a4c..13dd64fe143 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1400,7 +1400,7 @@ out: return 0; } -asmlinkage long sys_sysinfo(struct sysinfo __user *info) +SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) { struct sysinfo val; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 09796797d12..070a53eab80 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -54,11 +54,11 @@ static int key_get_type_from_user(char *type, * - returns the new key's serial number * - implements add_key() */ -asmlinkage long sys_add_key(const char __user *_type, - const char __user *_description, - const void __user *_payload, - size_t plen, - key_serial_t ringid) +SYSCALL_DEFINE5(add_key, const char __user *, _type, + const char __user *, _description, + const void __user *, _payload, + size_t, plen, + key_serial_t, ringid) { key_ref_t keyring_ref, key_ref; char type[32], *description; @@ -146,10 +146,10 @@ asmlinkage long sys_add_key(const char __user *_type, * - if the _callout_info string is empty, it will be rendered as "-" * - implements request_key() */ -asmlinkage long sys_request_key(const char __user *_type, - const char __user *_description, - const char __user *_callout_info, - key_serial_t destringid) +SYSCALL_DEFINE4(request_key, const char __user *, _type, + const char __user *, _description, + const char __user *, _callout_info, + key_serial_t, destringid) { struct key_type *ktype; struct key *key; -- cgit From 938bb9f5e840eddbf54e4f62f6c5ba9b3ae12c9d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:30 +0100 Subject: [CVE-2009-0029] System call wrappers part 28 Signed-off-by: Heiko Carstens --- fs/ioprio.c | 5 ++--- fs/notify/inotify/inotify_user.c | 4 ++-- mm/mempolicy.c | 24 +++++++++++------------- mm/migrate.c | 8 ++++---- security/keys/keyctl.c | 4 ++-- 5 files changed, 21 insertions(+), 24 deletions(-) diff --git a/fs/ioprio.c b/fs/ioprio.c index 1a39ac37094..c7c0b28d7d2 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -72,7 +72,7 @@ int set_task_ioprio(struct task_struct *task, int ioprio) } EXPORT_SYMBOL_GPL(set_task_ioprio); -asmlinkage long sys_ioprio_set(int which, int who, int ioprio) +SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio) { int class = IOPRIO_PRIO_CLASS(ioprio); int data = IOPRIO_PRIO_DATA(ioprio); @@ -188,7 +188,7 @@ int ioprio_best(unsigned short aprio, unsigned short bprio) return aprio; } -asmlinkage long sys_ioprio_get(int which, int who) +SYSCALL_DEFINE2(ioprio_get, int, which, int, who) { struct task_struct *g, *p; struct user_struct *user; @@ -252,4 +252,3 @@ asmlinkage long sys_ioprio_get(int which, int who) read_unlock(&tasklist_lock); return ret; } - diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 81b8644b013..efef1ffca77 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -576,7 +576,7 @@ static const struct inotify_operations inotify_user_ops = { .destroy_watch = free_inotify_user_watch, }; -asmlinkage long sys_inotify_init1(int flags) +SYSCALL_DEFINE1(inotify_init1, int, flags) { struct inotify_device *dev; struct inotify_handle *ih; @@ -655,7 +655,7 @@ out_put_fd: return ret; } -asmlinkage long sys_inotify_init(void) +SYSCALL_DEFINE0(inotify_init) { return sys_inotify_init1(0); } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e412ffa8e52..3eb4a6fdc04 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1068,10 +1068,9 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode, return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0; } -asmlinkage long sys_mbind(unsigned long start, unsigned long len, - unsigned long mode, - unsigned long __user *nmask, unsigned long maxnode, - unsigned flags) +SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, + unsigned long, mode, unsigned long __user *, nmask, + unsigned long, maxnode, unsigned, flags) { nodemask_t nodes; int err; @@ -1091,8 +1090,8 @@ asmlinkage long sys_mbind(unsigned long start, unsigned long len, } /* Set the process memory policy */ -asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, - unsigned long maxnode) +SYSCALL_DEFINE3(set_mempolicy, int, mode, unsigned long __user *, nmask, + unsigned long, maxnode) { int err; nodemask_t nodes; @@ -1110,9 +1109,9 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, return do_set_mempolicy(mode, flags, &nodes); } -asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, - const unsigned long __user *old_nodes, - const unsigned long __user *new_nodes) +SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, + const unsigned long __user *, old_nodes, + const unsigned long __user *, new_nodes) { const struct cred *cred = current_cred(), *tcred; struct mm_struct *mm; @@ -1185,10 +1184,9 @@ out: /* Retrieve NUMA policy */ -asmlinkage long sys_get_mempolicy(int __user *policy, - unsigned long __user *nmask, - unsigned long maxnode, - unsigned long addr, unsigned long flags) +SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, + unsigned long __user *, nmask, unsigned long, maxnode, + unsigned long, addr, unsigned long, flags) { int err; int uninitialized_var(pval); diff --git a/mm/migrate.c b/mm/migrate.c index a30ea5fcf9f..2bb4e1d6352 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1055,10 +1055,10 @@ out: * Move a list of pages in the address space of the currently executing * process. */ -asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, - const void __user * __user *pages, - const int __user *nodes, - int __user *status, int flags) +SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, + const void __user * __user *, pages, + const int __user *, nodes, + int __user *, status, int, flags) { const struct cred *cred = current_cred(), *tcred; struct task_struct *task; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 070a53eab80..e9335e1c6cf 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1216,8 +1216,8 @@ long keyctl_get_security(key_serial_t keyid, /* * the key control system call */ -asmlinkage long sys_keyctl(int option, unsigned long arg2, unsigned long arg3, - unsigned long arg4, unsigned long arg5) +SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3, + unsigned long, arg4, unsigned long, arg5) { switch (option) { case KEYCTL_GET_KEYRING_ID: -- cgit From 2e4d0924eb0c403ce4014fa139d1d61bf2c44fee Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:31 +0100 Subject: [CVE-2009-0029] System call wrappers part 29 Signed-off-by: Heiko Carstens --- fs/namei.c | 21 ++++++++++----------- fs/notify/inotify/inotify_user.c | 5 +++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 90520f05f99..bbc15c23755 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1962,8 +1962,8 @@ static int may_mknod(mode_t mode) } } -asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, - unsigned dev) +SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode, + unsigned, dev) { int error; char *tmp; @@ -2044,7 +2044,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) return error; } -asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode) +SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode) { int error = 0; char * tmp; @@ -2291,7 +2291,7 @@ slashes: goto exit2; } -asmlinkage long sys_unlinkat(int dfd, const char __user *pathname, int flag) +SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag) { if ((flag & ~AT_REMOVEDIR) != 0) return -EINVAL; @@ -2328,8 +2328,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) return error; } -asmlinkage long sys_symlinkat(const char __user *oldname, - int newdfd, const char __user *newname) +SYSCALL_DEFINE3(symlinkat, const char __user *, oldname, + int, newdfd, const char __user *, newname) { int error; char *from; @@ -2422,9 +2422,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de * with linux 2.0, and to avoid hard-linking to directories * and other special files. --ADM */ -asmlinkage long sys_linkat(int olddfd, const char __user *oldname, - int newdfd, const char __user *newname, - int flags) +SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, + int, newdfd, const char __user *, newname, int, flags) { struct dentry *new_dentry; struct nameidata nd; @@ -2624,8 +2623,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, return error; } -asmlinkage long sys_renameat(int olddfd, const char __user *oldname, - int newdfd, const char __user *newname) +SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, + int, newdfd, const char __user *, newname) { struct dentry *old_dir, *new_dir; struct dentry *old_dentry, *new_dentry; diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index efef1ffca77..d53a1838d6e 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -660,7 +660,8 @@ SYSCALL_DEFINE0(inotify_init) return sys_inotify_init1(0); } -asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask) +SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, + u32, mask) { struct inode *inode; struct inotify_device *dev; @@ -704,7 +705,7 @@ fput_and_out: return ret; } -asmlinkage long sys_inotify_rm_watch(int fd, __s32 wd) +SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) { struct file *filp; struct inotify_device *dev; -- cgit From 6559eed8ca7db0531a207cd80be5e28cd6f213c5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:32 +0100 Subject: [CVE-2009-0029] System call wrappers part 30 Signed-off-by: Heiko Carstens --- fs/open.c | 13 ++++++------- fs/stat.c | 12 ++++++------ fs/utimes.c | 6 ++++-- kernel/fork.c | 2 +- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/fs/open.c b/fs/open.c index bc49e3c388d..a3a78ceb2a2 100644 --- a/fs/open.c +++ b/fs/open.c @@ -447,7 +447,7 @@ SYSCALL_ALIAS(sys_fallocate, SyS_fallocate); * We do this by temporarily clearing all FS-related capabilities and * switching the fsuid/fsgid around to the real ones. */ -asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) +SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode) { const struct cred *old_cred; struct cred *override_cred; @@ -628,8 +628,7 @@ out: return err; } -asmlinkage long sys_fchmodat(int dfd, const char __user *filename, - mode_t mode) +SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode) { struct path path; struct inode *inode; @@ -707,8 +706,8 @@ out: return error; } -asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, - gid_t group, int flag) +SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, + gid_t, group, int, flag) { struct path path; int error = -EINVAL; @@ -1060,8 +1059,8 @@ SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode) return ret; } -asmlinkage long sys_openat(int dfd, const char __user *filename, int flags, - int mode) +SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags, + int, mode) { long ret; diff --git a/fs/stat.c b/fs/stat.c index d712a0dfb50..2db740a0cfb 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -260,8 +260,8 @@ SYSCALL_DEFINE2(newlstat, char __user *, filename, struct stat __user *, statbuf } #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT) -asmlinkage long sys_newfstatat(int dfd, char __user *filename, - struct stat __user *statbuf, int flag) +SYSCALL_DEFINE4(newfstatat, int, dfd, char __user *, filename, + struct stat __user *, statbuf, int, flag) { struct kstat stat; int error = -EINVAL; @@ -293,8 +293,8 @@ SYSCALL_DEFINE2(newfstat, unsigned int, fd, struct stat __user *, statbuf) return error; } -asmlinkage long sys_readlinkat(int dfd, const char __user *pathname, - char __user *buf, int bufsiz) +SYSCALL_DEFINE4(readlinkat, int, dfd, const char __user *, pathname, + char __user *, buf, int, bufsiz) { struct path path; int error; @@ -400,8 +400,8 @@ SYSCALL_DEFINE2(fstat64, unsigned long, fd, struct stat64 __user *, statbuf) return error; } -asmlinkage long sys_fstatat64(int dfd, char __user *filename, - struct stat64 __user *statbuf, int flag) +SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename, + struct stat64 __user *, statbuf, int, flag) { struct kstat stat; int error = -EINVAL; diff --git a/fs/utimes.c b/fs/utimes.c index ee853615798..e4c75db5d37 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -170,7 +170,8 @@ out: return error; } -asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __user *utimes, int flags) +SYSCALL_DEFINE4(utimensat, int, dfd, char __user *, filename, + struct timespec __user *, utimes, int, flags) { struct timespec tstimes[2]; @@ -187,7 +188,8 @@ asmlinkage long sys_utimensat(int dfd, char __user *filename, struct timespec __ return do_utimes(dfd, filename, utimes ? tstimes : NULL, flags); } -asmlinkage long sys_futimesat(int dfd, char __user *filename, struct timeval __user *utimes) +SYSCALL_DEFINE3(futimesat, int, dfd, char __user *, filename, + struct timeval __user *, utimes) { struct timeval times[2]; struct timespec tstimes[2]; diff --git a/kernel/fork.c b/kernel/fork.c index 8eb37d38c6a..bf0cef8bbdf 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1603,7 +1603,7 @@ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp * constructed. Here we are modifying the current, active, * task_struct. */ -asmlinkage long sys_unshare(unsigned long unshare_flags) +SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) { int err = 0; struct fs_struct *fs, *new_fs = NULL; -- cgit From 836f92adf121f806e9beb5b6b88bd5c9c4ea3f24 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:33 +0100 Subject: [CVE-2009-0029] System call wrappers part 31 Signed-off-by: Heiko Carstens --- fs/signalfd.c | 8 ++++---- fs/splice.c | 12 ++++++------ fs/timerfd.c | 8 ++++---- kernel/futex.c | 11 +++++------ kernel/sys.c | 4 ++-- 5 files changed, 21 insertions(+), 22 deletions(-) diff --git a/fs/signalfd.c b/fs/signalfd.c index 9c39bc7f843..b07565c9438 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -205,8 +205,8 @@ static const struct file_operations signalfd_fops = { .read = signalfd_read, }; -asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask, - size_t sizemask, int flags) +SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, + size_t, sizemask, int, flags) { sigset_t sigmask; struct signalfd_ctx *ctx; @@ -259,8 +259,8 @@ asmlinkage long sys_signalfd4(int ufd, sigset_t __user *user_mask, return ufd; } -asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, - size_t sizemask) +SYSCALL_DEFINE3(signalfd, int, ufd, sigset_t __user *, user_mask, + size_t, sizemask) { return sys_signalfd4(ufd, user_mask, sizemask, 0); } diff --git a/fs/splice.c b/fs/splice.c index a54b3e3f10a..4ed0ba44a96 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1435,8 +1435,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, * Currently we punt and implement it as a normal copy, see pipe_to_user(). * */ -asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, - unsigned long nr_segs, unsigned int flags) +SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, + unsigned long, nr_segs, unsigned int, flags) { struct file *file; long error; @@ -1461,9 +1461,9 @@ asmlinkage long sys_vmsplice(int fd, const struct iovec __user *iov, return error; } -asmlinkage long sys_splice(int fd_in, loff_t __user *off_in, - int fd_out, loff_t __user *off_out, - size_t len, unsigned int flags) +SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, + int, fd_out, loff_t __user *, off_out, + size_t, len, unsigned int, flags) { long error; struct file *in, *out; @@ -1685,7 +1685,7 @@ static long do_tee(struct file *in, struct file *out, size_t len, return ret; } -asmlinkage long sys_tee(int fdin, int fdout, size_t len, unsigned int flags) +SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) { struct file *in; int error, fput_in; diff --git a/fs/timerfd.c b/fs/timerfd.c index 0862f0e49d0..c8c14f58b96 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -177,7 +177,7 @@ static struct file *timerfd_fget(int fd) return file; } -asmlinkage long sys_timerfd_create(int clockid, int flags) +SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) { int ufd; struct timerfd_ctx *ctx; @@ -208,9 +208,9 @@ asmlinkage long sys_timerfd_create(int clockid, int flags) return ufd; } -asmlinkage long sys_timerfd_settime(int ufd, int flags, - const struct itimerspec __user *utmr, - struct itimerspec __user *otmr) +SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, + const struct itimerspec __user *, utmr, + struct itimerspec __user *, otmr) { struct file *file; struct timerfd_ctx *ctx; diff --git a/kernel/futex.c b/kernel/futex.c index e86931d8d4e..f89d373a9c6 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1733,9 +1733,8 @@ pi_faulted: * @head: pointer to the list-head * @len: length of the list-head, as userspace expects */ -asmlinkage long -sys_set_robust_list(struct robust_list_head __user *head, - size_t len) +SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head, + size_t, len) { if (!futex_cmpxchg_enabled) return -ENOSYS; @@ -1756,9 +1755,9 @@ sys_set_robust_list(struct robust_list_head __user *head, * @head_ptr: pointer to a list-head pointer, the kernel fills it in * @len_ptr: pointer to a length field, the kernel fills in the header size */ -asmlinkage long -sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, - size_t __user *len_ptr) +SYSCALL_DEFINE3(get_robust_list, int, pid, + struct robust_list_head __user * __user *, head_ptr, + size_t __user *, len_ptr) { struct robust_list_head __user *head; unsigned long ret; diff --git a/kernel/sys.c b/kernel/sys.c index 59aadcdad6c..e7dc0e10a48 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1817,8 +1817,8 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return error; } -asmlinkage long sys_getcpu(unsigned __user *cpup, unsigned __user *nodep, - struct getcpu_cache __user *unused) +SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, + struct getcpu_cache __user *, unused) { int err = 0; int cpu = raw_smp_processor_id(); -- cgit From d4e82042c4cfa87a7d51710b71f568fe80132551 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:34 +0100 Subject: [CVE-2009-0029] System call wrappers part 32 Signed-off-by: Heiko Carstens --- fs/eventfd.c | 5 ++--- fs/pipe.c | 2 +- fs/readdir.c | 3 ++- fs/select.c | 11 ++++++----- fs/timerfd.c | 2 +- include/linux/syscalls.h | 7 +++++++ kernel/signal.c | 11 +++++------ 7 files changed, 24 insertions(+), 17 deletions(-) diff --git a/fs/eventfd.c b/fs/eventfd.c index 08bf558d040..5de2c2db3aa 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -198,7 +198,7 @@ struct file *eventfd_fget(int fd) return file; } -asmlinkage long sys_eventfd2(unsigned int count, int flags) +SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags) { int fd; struct eventfd_ctx *ctx; @@ -228,8 +228,7 @@ asmlinkage long sys_eventfd2(unsigned int count, int flags) return fd; } -asmlinkage long sys_eventfd(unsigned int count) +SYSCALL_DEFINE1(eventfd, unsigned int, count) { return sys_eventfd2(count, 0); } - diff --git a/fs/pipe.c b/fs/pipe.c index 0c64db86c91..b89c878588a 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1043,7 +1043,7 @@ int do_pipe(int *fd) * sys_pipe() is the normal C calling standard for creating * a pipe. It's not the way Unix traditionally does this, though. */ -asmlinkage long sys_pipe2(int __user *fildes, int flags) +SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) { int fd[2]; int error; diff --git a/fs/readdir.c b/fs/readdir.c index cf6a0e39819..7723401f8d8 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -102,7 +102,8 @@ efault: return -EFAULT; } -asmlinkage long sys_old_readdir(unsigned int fd, struct old_linux_dirent __user * dirent, unsigned int count) +SYSCALL_DEFINE3(old_readdir, unsigned int, fd, + struct old_linux_dirent __user *, dirent, unsigned int, count) { int error; struct file * file; diff --git a/fs/select.c b/fs/select.c index 338f703403a..0fe0e1469df 100644 --- a/fs/select.c +++ b/fs/select.c @@ -636,8 +636,9 @@ static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp, * which has a pointer to the sigset_t itself followed by a size_t containing * the sigset size. */ -asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, struct timespec __user *tsp, void __user *sig) +SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp, + fd_set __user *, exp, struct timespec __user *, tsp, + void __user *, sig) { size_t sigsetsize = 0; sigset_t __user *up = NULL; @@ -889,9 +890,9 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, } #ifdef HAVE_SET_RESTORE_SIGMASK -asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, - struct timespec __user *tsp, const sigset_t __user *sigmask, - size_t sigsetsize) +SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds, + struct timespec __user *, tsp, const sigset_t __user *, sigmask, + size_t, sigsetsize) { sigset_t ksigmask, sigsaved; struct timespec ts, end_time, *to = NULL; diff --git a/fs/timerfd.c b/fs/timerfd.c index c8c14f58b96..6a123b8ff3f 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -265,7 +265,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, return 0; } -asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr) +SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) { struct file *file; struct timerfd_ctx *ctx; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 90aa5eba87a..56c400138b0 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -678,6 +678,13 @@ asmlinkage long sys_eventfd(unsigned int count); asmlinkage long sys_eventfd2(unsigned int count, int flags); asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len); asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int); +asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *, + fd_set __user *, struct timespec __user *, + void __user *); +asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, + struct timespec __user *, const sigset_t __user *, + size_t); +asmlinkage long sys_pipe2(int __user *, int); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); diff --git a/kernel/signal.c b/kernel/signal.c index e2333929611..e73759783dc 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2491,11 +2491,10 @@ out: #endif /* __ARCH_WANT_SYS_SIGPROCMASK */ #ifdef __ARCH_WANT_SYS_RT_SIGACTION -asmlinkage long -sys_rt_sigaction(int sig, - const struct sigaction __user *act, - struct sigaction __user *oact, - size_t sigsetsize) +SYSCALL_DEFINE4(rt_sigaction, int, sig, + const struct sigaction __user *, act, + struct sigaction __user *, oact, + size_t, sigsetsize) { struct k_sigaction new_sa, old_sa; int ret = -EINVAL; @@ -2578,7 +2577,7 @@ SYSCALL_DEFINE0(pause) #endif #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND -asmlinkage long sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize) +SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize) { sigset_t newset; -- cgit From 2b66421995d2e93c9d1a0111acf2581f8529c6e5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:35 +0100 Subject: [CVE-2009-0029] System call wrappers part 33 Signed-off-by: Heiko Carstens --- fs/pipe.c | 2 +- include/linux/syscalls.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/pipe.c b/fs/pipe.c index b89c878588a..3a48ba5179d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1059,7 +1059,7 @@ SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) return error; } -asmlinkage long sys_pipe(int __user *fildes) +SYSCALL_DEFINE1(pipe, int __user *, fildes) { return sys_pipe2(fildes, 0); } diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 56c400138b0..16875f89e6a 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -685,6 +685,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *, unsigned int, struct timespec __user *, const sigset_t __user *, size_t); asmlinkage long sys_pipe2(int __user *, int); +asmlinkage long sys_pipe(int __user *); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); -- cgit From 26689452f5ca201add63b1b1ff0dbcf82d6885e7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 14 Jan 2009 14:14:36 +0100 Subject: [CVE-2009-0029] s390 specific system call wrappers Signed-off-by: Heiko Carstens --- arch/s390/kernel/compat_wrapper.S | 4 ++-- arch/s390/kernel/entry.h | 15 ++++++++------- arch/s390/kernel/process.c | 9 +++++---- arch/s390/kernel/signal.c | 19 ++++++++----------- arch/s390/kernel/sys_s390.c | 35 +++++++++++++++++++++-------------- arch/s390/kernel/syscalls.S | 12 ++++++------ 6 files changed, 50 insertions(+), 44 deletions(-) diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S index fc2c97197a5..6035cd20c7a 100644 --- a/arch/s390/kernel/compat_wrapper.S +++ b/arch/s390/kernel/compat_wrapper.S @@ -547,7 +547,7 @@ sys32_setdomainname_wrapper: .globl sys32_newuname_wrapper sys32_newuname_wrapper: llgtr %r2,%r2 # struct new_utsname * - jg s390x_newuname # branch to system call + jg sys_s390_newuname # branch to system call .globl compat_sys_adjtimex_wrapper compat_sys_adjtimex_wrapper: @@ -615,7 +615,7 @@ sys32_sysfs_wrapper: .globl sys32_personality_wrapper sys32_personality_wrapper: llgfr %r2,%r2 # unsigned long - jg s390x_personality # branch to system call + jg sys_s390_personality # branch to system call .globl sys32_setfsuid16_wrapper sys32_setfsuid16_wrapper: diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 5c9b5b37004..950c59c6688 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -31,21 +31,22 @@ struct old_sigaction; struct sel_arg_struct; long sys_mmap2(struct mmap_arg_struct __user *arg); -long old_mmap(struct mmap_arg_struct __user *arg); +long sys_s390_old_mmap(struct mmap_arg_struct __user *arg); long sys_ipc(uint call, int first, unsigned long second, unsigned long third, void __user *ptr); -long s390x_newuname(struct new_utsname __user *name); -long s390x_personality(unsigned long personality); -long s390_fadvise64(int fd, u32 offset_high, u32 offset_low, +long sys_s390_newuname(struct new_utsname __user *name); +long sys_s390_personality(unsigned long personality); +long sys_s390_fadvise64(int fd, u32 offset_high, u32 offset_low, size_t len, int advice); -long s390_fadvise64_64(struct fadvise64_64_args __user *args); -long s390_fallocate(int fd, int mode, loff_t offset, u32 len_high, u32 len_low); +long sys_s390_fadvise64_64(struct fadvise64_64_args __user *args); +long sys_s390_fallocate(int fd, int mode, loff_t offset, u32 len_high, + u32 len_low); long sys_fork(void); long sys_clone(void); long sys_vfork(void); void execve_tail(void); long sys_execve(void); -int sys_sigsuspend(int history0, int history1, old_sigset_t mask); +long sys_sigsuspend(int history0, int history1, old_sigset_t mask); long sys_sigaction(int sig, const struct old_sigaction __user *act, struct old_sigaction __user *oact); long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss); diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index b6110bdf8dc..5cd38a90e64 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -225,13 +226,13 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp, return 0; } -asmlinkage long sys_fork(void) +SYSCALL_DEFINE0(fork) { struct pt_regs *regs = task_pt_regs(current); return do_fork(SIGCHLD, regs->gprs[15], regs, 0, NULL, NULL); } -asmlinkage long sys_clone(void) +SYSCALL_DEFINE0(clone) { struct pt_regs *regs = task_pt_regs(current); unsigned long clone_flags; @@ -258,7 +259,7 @@ asmlinkage long sys_clone(void) * do not have enough call-clobbered registers to hold all * the information you need. */ -asmlinkage long sys_vfork(void) +SYSCALL_DEFINE0(vfork) { struct pt_regs *regs = task_pt_regs(current); return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, @@ -278,7 +279,7 @@ asmlinkage void execve_tail(void) /* * sys_execve() executes a new program. */ -asmlinkage long sys_execve(void) +SYSCALL_DEFINE0(execve) { struct pt_regs *regs = task_pt_regs(current); char *filename; diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 8e6812a2267..3cf74c3ccb6 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -53,8 +54,7 @@ typedef struct /* * Atomically swap in the new signal mask, and wait for a signal. */ -asmlinkage int -sys_sigsuspend(int history0, int history1, old_sigset_t mask) +SYSCALL_DEFINE3(sigsuspend, int, history0, int, history1, old_sigset_t, mask) { mask &= _BLOCKABLE; spin_lock_irq(¤t->sighand->siglock); @@ -70,9 +70,8 @@ sys_sigsuspend(int history0, int history1, old_sigset_t mask) return -ERESTARTNOHAND; } -asmlinkage long -sys_sigaction(int sig, const struct old_sigaction __user *act, - struct old_sigaction __user *oact) +SYSCALL_DEFINE3(sigaction, int, sig, const struct old_sigaction __user *, act, + struct old_sigaction __user *, oact) { struct k_sigaction new_ka, old_ka; int ret; @@ -102,15 +101,13 @@ sys_sigaction(int sig, const struct old_sigaction __user *act, return ret; } -asmlinkage long -sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss) +SYSCALL_DEFINE2(sigaltstack, const stack_t __user *, uss, + stack_t __user *, uoss) { struct pt_regs *regs = task_pt_regs(current); return do_sigaltstack(uss, uoss, regs->gprs[15]); } - - /* Returns non-zero on fault. */ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs) { @@ -164,7 +161,7 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs) return 0; } -asmlinkage long sys_sigreturn(void) +SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); sigframe __user *frame = (sigframe __user *)regs->gprs[15]; @@ -191,7 +188,7 @@ badframe: return 0; } -asmlinkage long sys_rt_sigreturn(void) +SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); rt_sigframe __user *frame = (rt_sigframe __user *)regs->gprs[15]; diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c index c34be4568b8..c7ae4b17e0e 100644 --- a/arch/s390/kernel/sys_s390.c +++ b/arch/s390/kernel/sys_s390.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "entry.h" @@ -74,7 +75,7 @@ struct mmap_arg_struct { unsigned long offset; }; -asmlinkage long sys_mmap2(struct mmap_arg_struct __user *arg) +SYSCALL_DEFINE1(mmap2, struct mmap_arg_struct __user *, arg) { struct mmap_arg_struct a; int error = -EFAULT; @@ -86,7 +87,7 @@ out: return error; } -asmlinkage long old_mmap(struct mmap_arg_struct __user *arg) +SYSCALL_DEFINE1(s390_old_mmap, struct mmap_arg_struct __user *, arg) { struct mmap_arg_struct a; long error = -EFAULT; @@ -108,8 +109,8 @@ out: * * This is really horribly ugly. */ -asmlinkage long sys_ipc(uint call, int first, unsigned long second, - unsigned long third, void __user *ptr) +SYSCALL_DEFINE5(ipc, uint, call, int, first, unsigned long, second, + unsigned long, third, void __user *, ptr) { struct ipc_kludge tmp; int ret; @@ -175,7 +176,7 @@ asmlinkage long sys_ipc(uint call, int first, unsigned long second, } #ifdef CONFIG_64BIT -asmlinkage long s390x_newuname(struct new_utsname __user *name) +SYSCALL_DEFINE1(s390_newuname, struct new_utsname __user *, name) { int ret = sys_newuname(name); @@ -186,7 +187,7 @@ asmlinkage long s390x_newuname(struct new_utsname __user *name) return ret; } -asmlinkage long s390x_personality(unsigned long personality) +SYSCALL_DEFINE1(s390_personality, unsigned long, personality) { int ret; @@ -205,15 +206,13 @@ asmlinkage long s390x_personality(unsigned long personality) */ #ifndef CONFIG_64BIT -asmlinkage long -s390_fadvise64(int fd, u32 offset_high, u32 offset_low, size_t len, int advice) +SYSCALL_DEFINE5(s390_fadvise64, int, fd, u32, offset_high, u32, offset_low, + size_t, len, int, advice) { return sys_fadvise64(fd, (u64) offset_high << 32 | offset_low, len, advice); } -#endif - struct fadvise64_64_args { int fd; long long offset; @@ -221,8 +220,7 @@ struct fadvise64_64_args { int advice; }; -asmlinkage long -s390_fadvise64_64(struct fadvise64_64_args __user *args) +SYSCALL_DEFINE1(s390_fadvise64_64, struct fadvise64_64_args __user *, args) { struct fadvise64_64_args a; @@ -231,7 +229,6 @@ s390_fadvise64_64(struct fadvise64_64_args __user *args) return sys_fadvise64_64(a.fd, a.offset, a.len, a.advice); } -#ifndef CONFIG_64BIT /* * This is a wrapper to call sys_fallocate(). For 31 bit s390 the last * 64 bit argument "len" is split into the upper and lower 32 bits. The @@ -244,9 +241,19 @@ s390_fadvise64_64(struct fadvise64_64_args __user *args) * to * %r2: fd, %r3: mode, %r4/%r5: offset, 96(%r15)-103(%r15): len */ -asmlinkage long s390_fallocate(int fd, int mode, loff_t offset, +SYSCALL_DEFINE(s390_fallocate)(int fd, int mode, loff_t offset, u32 len_high, u32 len_low) { return sys_fallocate(fd, mode, offset, ((u64)len_high << 32) | len_low); } +#ifdef CONFIG_HAVE_SYSCALL_WRAPPERS +asmlinkage long SyS_s390_fallocate(long fd, long mode, loff_t offset, + long len_high, long len_low) +{ + return SYSC_s390_fallocate((int) fd, (int) mode, offset, + (u32) len_high, (u32) len_low); +} +SYSCALL_ALIAS(sys_s390_fallocate, SyS_s390_fallocate); +#endif + #endif diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 2d61787949d..76d16e0140b 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -98,7 +98,7 @@ SYSCALL(sys_uselib,sys_uselib,sys32_uselib_wrapper) SYSCALL(sys_swapon,sys_swapon,sys32_swapon_wrapper) SYSCALL(sys_reboot,sys_reboot,sys32_reboot_wrapper) SYSCALL(sys_ni_syscall,sys_ni_syscall,old32_readdir_wrapper) /* old readdir syscall */ -SYSCALL(old_mmap,old_mmap,old32_mmap_wrapper) /* 90 */ +SYSCALL(sys_s390_old_mmap,sys_s390_old_mmap,old32_mmap_wrapper) /* 90 */ SYSCALL(sys_munmap,sys_munmap,sys32_munmap_wrapper) SYSCALL(sys_truncate,sys_truncate,sys32_truncate_wrapper) SYSCALL(sys_ftruncate,sys_ftruncate,sys32_ftruncate_wrapper) @@ -130,7 +130,7 @@ SYSCALL(sys_fsync,sys_fsync,sys32_fsync_wrapper) SYSCALL(sys_sigreturn,sys_sigreturn,sys32_sigreturn) SYSCALL(sys_clone,sys_clone,sys32_clone) /* 120 */ SYSCALL(sys_setdomainname,sys_setdomainname,sys32_setdomainname_wrapper) -SYSCALL(sys_newuname,s390x_newuname,sys32_newuname_wrapper) +SYSCALL(sys_newuname,sys_s390_newuname,sys32_newuname_wrapper) NI_SYSCALL /* modify_ldt for i386 */ SYSCALL(sys_adjtimex,sys_adjtimex,compat_sys_adjtimex_wrapper) SYSCALL(sys_mprotect,sys_mprotect,sys32_mprotect_wrapper) /* 125 */ @@ -144,7 +144,7 @@ SYSCALL(sys_getpgid,sys_getpgid,sys32_getpgid_wrapper) SYSCALL(sys_fchdir,sys_fchdir,sys32_fchdir_wrapper) SYSCALL(sys_bdflush,sys_bdflush,sys32_bdflush_wrapper) SYSCALL(sys_sysfs,sys_sysfs,sys32_sysfs_wrapper) /* 135 */ -SYSCALL(sys_personality,s390x_personality,sys32_personality_wrapper) +SYSCALL(sys_personality,sys_s390_personality,sys32_personality_wrapper) NI_SYSCALL /* for afs_syscall */ SYSCALL(sys_setfsuid16,sys_ni_syscall,sys32_setfsuid16_wrapper) /* old setfsuid16 syscall */ SYSCALL(sys_setfsgid16,sys_ni_syscall,sys32_setfsgid16_wrapper) /* old setfsgid16 syscall */ @@ -261,7 +261,7 @@ SYSCALL(sys_epoll_create,sys_epoll_create,sys_epoll_create_wrapper) SYSCALL(sys_epoll_ctl,sys_epoll_ctl,sys_epoll_ctl_wrapper) /* 250 */ SYSCALL(sys_epoll_wait,sys_epoll_wait,sys_epoll_wait_wrapper) SYSCALL(sys_set_tid_address,sys_set_tid_address,sys32_set_tid_address_wrapper) -SYSCALL(s390_fadvise64,sys_fadvise64_64,sys32_fadvise64_wrapper) +SYSCALL(sys_s390_fadvise64,sys_fadvise64_64,sys32_fadvise64_wrapper) SYSCALL(sys_timer_create,sys_timer_create,sys32_timer_create_wrapper) SYSCALL(sys_timer_settime,sys_timer_settime,sys32_timer_settime_wrapper) /* 255 */ SYSCALL(sys_timer_gettime,sys_timer_gettime,sys32_timer_gettime_wrapper) @@ -272,7 +272,7 @@ SYSCALL(sys_clock_gettime,sys_clock_gettime,sys32_clock_gettime_wrapper) /* 260 SYSCALL(sys_clock_getres,sys_clock_getres,sys32_clock_getres_wrapper) SYSCALL(sys_clock_nanosleep,sys_clock_nanosleep,sys32_clock_nanosleep_wrapper) NI_SYSCALL /* reserved for vserver */ -SYSCALL(s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) +SYSCALL(sys_s390_fadvise64_64,sys_ni_syscall,sys32_fadvise64_64_wrapper) SYSCALL(sys_statfs64,sys_statfs64,compat_sys_statfs64_wrapper) SYSCALL(sys_fstatfs64,sys_fstatfs64,compat_sys_fstatfs64_wrapper) SYSCALL(sys_remap_file_pages,sys_remap_file_pages,sys32_remap_file_pages_wrapper) @@ -322,7 +322,7 @@ NI_SYSCALL /* 310 sys_move_pages */ SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper) SYSCALL(sys_epoll_pwait,sys_epoll_pwait,compat_sys_epoll_pwait_wrapper) SYSCALL(sys_utimes,sys_utimes,compat_sys_utimes_wrapper) -SYSCALL(s390_fallocate,sys_fallocate,sys_fallocate_wrapper) +SYSCALL(sys_s390_fallocate,sys_fallocate,sys_fallocate_wrapper) SYSCALL(sys_utimensat,sys_utimensat,compat_sys_utimensat_wrapper) /* 315 */ SYSCALL(sys_signalfd,sys_signalfd,compat_sys_signalfd_wrapper) NI_SYSCALL /* 317 old sys_timer_fd */ -- cgit From 2465fb6605b4f8f3964b132017bf4078d1265fe9 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Jan 2009 15:58:55 +0100 Subject: ALSA: hda - Fix missing initialization of NID 0x0e for STAC925x The selector widget 0x0e isn't initialized properly in the whole probe process, thus it can be a wrong value depending on the BIOS setup. This patch adds the init verb to set it to the max & unmuted. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_sigmatel.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 25230f4c888..6d9a4a2aa4a 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -884,6 +884,8 @@ static struct hda_verb stac92hd71bxx_analog_core_init[] = { static struct hda_verb stac925x_core_init[] = { /* set dac0mux for dac converter */ { 0x06, AC_VERB_SET_CONNECT_SEL, 0x00}, + /* unmute and set max the selector */ + { 0x0e, AC_VERB_SET_AMP_GAIN_MUTE, 0xb07f }, {} }; -- cgit From 18e6959c385f3edf3991fa6662a53dac4eb10d5b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 14 Jan 2009 07:28:16 +0100 Subject: mm: fix assertion This assertion is incorrect for lockless pagecache. By definition if we have an unpinned page that we are trying to take a speculative reference to, it may become the tail of a compound page at any time (if it is freed, then reallocated as a compound page). It was still a valid assertion for the vmscan.c LRU isolation case, but it doesn't seem incredibly helpful... if somebody wants it, they can put it back directly where it applies in the vmscan code. Signed-off-by: Nick Piggin Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index b91a73fd1bc..e8ddc98b840 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -260,7 +260,6 @@ static inline int put_page_testzero(struct page *page) */ static inline int get_page_unless_zero(struct page *page) { - VM_BUG_ON(PageTail(page)); return atomic_inc_not_zero(&page->_count); } -- cgit From 9316fcacb89c59fe556c48587ac02cd7f5d38045 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 Jan 2009 09:35:44 -0800 Subject: kernel/up.c: omit it if SMP=y, USE_GENERIC_SMP_HELPERS=n Fix the sparc build - we were including `up.o' on SMP builds, when CONFIG_USE_GENERIC_SMP_HELPERS=n. Tested-by: Robert Reif Fixed-by: Robert Reif Cc: David Miller Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/Makefile | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/Makefile b/kernel/Makefile index 2aebc4cd787..170a9213c1b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -40,9 +40,8 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o -ifeq ($(CONFIG_USE_GENERIC_SMP_HELPERS),y) -obj-y += smp.o -else +obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o +ifneq ($(CONFIG_SMP),y) obj-y += up.o endif obj-$(CONFIG_SMP) += spinlock.o -- cgit From 9b896033aa2781d36b2d3f756fe70325fc8487e2 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 14 Jan 2009 19:19:02 +0100 Subject: ide: fix accidental LOCKDEP breakage caused by local_irq_set() removal commit 54cc1428cfa619e16d75baae8cb041a2eff015f0 ("ide: remove local_irq_set() macro") accidentally replaced local_save_flags() by local_irq_set() in ide_probe_port() and __ide_wait_stat() which resulted in LOCKDEP breakage. Reported-by: Larry Finger Tested-by: Larry Finger Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-iops.c | 2 +- drivers/ide/ide-probe.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c index e728cfe7273..753b92ebe0a 100644 --- a/drivers/ide/ide-iops.c +++ b/drivers/ide/ide-iops.c @@ -493,7 +493,7 @@ static int __ide_wait_stat(ide_drive_t *drive, u8 good, u8 bad, unsigned long ti stat = tp_ops->read_status(hwif); if (stat & ATA_BUSY) { - local_irq_save(flags); + local_save_flags(flags); local_irq_enable_in_hardirq(); timeout += jiffies; while ((stat = tp_ops->read_status(hwif)) & ATA_BUSY) { diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 0ccbb4459fb..312127ea443 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -796,7 +796,7 @@ static int ide_probe_port(ide_hwif_t *hwif) if (irqd) disable_irq(hwif->irq); - local_irq_save(flags); + local_save_flags(flags); local_irq_enable_in_hardirq(); if (ide_port_wait_ready(hwif) == -EBUSY) -- cgit From b94b898f3107046b5c97c556e23529283ea5eadd Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Wed, 14 Jan 2009 19:19:02 +0100 Subject: it821x: Add ultra_mask quirk for Vortex86SX On Vortex86SX with IDE controller revision 0x11 ultra DMA must be disabled. This patch was tested by DMP and seems to work. It is a cleaned up version of their older Kernel patch: http://www.dmp.com.tw/tech/vortex86sx/patch-2.6.24-DMP.gz Tested-by: Shawn Lin Signed-off-by: Brandon Philips Cc: Alan Cox Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/it821x.c | 12 ++++++++++++ include/linux/pci_ids.h | 1 + 2 files changed, 13 insertions(+) diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c index 0be27ac1f07..983440a9a5f 100644 --- a/drivers/ide/it821x.c +++ b/drivers/ide/it821x.c @@ -68,6 +68,8 @@ #define DRV_NAME "it821x" +#define QUIRK_VORTEX86 1 + struct it821x_dev { unsigned int smart:1, /* Are we in smart raid mode */ @@ -79,6 +81,7 @@ struct it821x_dev u16 pio[2]; /* Cached PIO values */ u16 mwdma[2]; /* Cached MWDMA values */ u16 udma[2]; /* Cached UDMA values (per drive) */ + u16 quirks; }; #define ATA_66 0 @@ -577,6 +580,12 @@ static void __devinit init_hwif_it821x(ide_hwif_t *hwif) hwif->ultra_mask = ATA_UDMA6; hwif->mwdma_mask = ATA_MWDMA2; + + /* Vortex86SX quirk: prevent Ultra-DMA mode to fix BadCRC issue */ + if (idev->quirks & QUIRK_VORTEX86) { + if (dev->revision == 0x11) + hwif->ultra_mask = 0; + } } static void it8212_disable_raid(struct pci_dev *dev) @@ -649,6 +658,8 @@ static int __devinit it821x_init_one(struct pci_dev *dev, const struct pci_devic return -ENOMEM; } + itdevs->quirks = id->driver_data; + rc = ide_pci_init_one(dev, &it821x_chipset, itdevs); if (rc) kfree(itdevs); @@ -668,6 +679,7 @@ static void __devexit it821x_remove(struct pci_dev *dev) static const struct pci_device_id it821x_pci_tbl[] = { { PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8211), 0 }, { PCI_VDEVICE(ITE, PCI_DEVICE_ID_ITE_8212), 0 }, + { PCI_VDEVICE(RDC, PCI_DEVICE_ID_RDC_D1010), QUIRK_VORTEX86 }, { 0, }, }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d543365518a..d56ad9c21c0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2174,6 +2174,7 @@ #define PCI_DEVICE_ID_RDC_R6040 0x6040 #define PCI_DEVICE_ID_RDC_R6060 0x6060 #define PCI_DEVICE_ID_RDC_R6061 0x6061 +#define PCI_DEVICE_ID_RDC_D1010 0x1010 #define PCI_VENDOR_ID_LENOVO 0x17aa -- cgit From 4a2462693b37da1438f53f3fbfc75e081659e975 Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Wed, 14 Jan 2009 19:19:03 +0100 Subject: it821x.c: use dev->revision instead of pci_read_config_byte Minor cleanup. Signed-off-by: Brandon Philips Cc: Alan Cox Cc: Shawn Lin Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/it821x.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ide/it821x.c b/drivers/ide/it821x.c index 983440a9a5f..e1c4f543739 100644 --- a/drivers/ide/it821x.c +++ b/drivers/ide/it821x.c @@ -560,8 +560,7 @@ static void __devinit init_hwif_it821x(ide_hwif_t *hwif) * this is necessary. */ - pci_read_config_byte(dev, 0x08, &conf); - if (conf == 0x10) { + if (dev->revision == 0x10) { idev->timing10 = 1; hwif->host_flags |= IDE_HFLAG_NO_ATAPI_DMA; if (idev->smart == 0) -- cgit From 655772cc79c94ebf282e1df4d3c3375f05a565ae Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 14 Jan 2009 19:19:03 +0100 Subject: ide: can't use SSD/non-rotational queue flag for all CFA devices Some rotating disks also present themselves as CFA devices. Reported-by: Alan Cox Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-disk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 4088a622873..806760d24ce 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -633,7 +633,7 @@ static void ide_disk_setup(ide_drive_t *drive) printk(KERN_INFO "%s: max request size: %dKiB\n", drive->name, q->max_sectors / 2); - if (ata_id_is_ssd(id) || ata_id_is_cfa(id)) + if (ata_id_is_ssd(id)) queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); /* calculate drive capacity, and select LBA if possible */ -- cgit From 4180e8334cf8301cf37131bc5d69c0cd724682cb Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 14 Jan 2009 19:19:03 +0100 Subject: via82cxxx: fix cable warning message Remove reference to the removed old-style kernel parameter. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/via82cxxx.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/ide/via82cxxx.c b/drivers/ide/via82cxxx.c index fecc0e03c3f..703c3eeb20a 100644 --- a/drivers/ide/via82cxxx.c +++ b/drivers/ide/via82cxxx.c @@ -432,8 +432,6 @@ static int __devinit via_init_one(struct pci_dev *dev, const struct pci_device_i if (via_clock < 20000 || via_clock > 50000) { printk(KERN_WARNING DRV_NAME ": User given PCI clock speed " "impossible (%d), using 33 MHz instead.\n", via_clock); - printk(KERN_WARNING DRV_NAME ": Use ide0=ata66 if you want " - "to assume 80-wire cable.\n"); via_clock = 33333; } -- cgit From cadb300a4254b1f28bce84e280b7d46c525f73c6 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 14 Jan 2009 19:19:03 +0100 Subject: sl82c105: remove dead code CONFIG_LOPEC and CONFIG_SANDPOINT config options are gone. Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/sl82c105.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/ide/sl82c105.c b/drivers/ide/sl82c105.c index 48cc748c504..6297956507c 100644 --- a/drivers/ide/sl82c105.c +++ b/drivers/ide/sl82c105.c @@ -310,10 +310,6 @@ static const struct ide_port_info sl82c105_chipset __devinitdata = { .dma_ops = &sl82c105_dma_ops, .host_flags = IDE_HFLAG_IO_32BIT | IDE_HFLAG_UNMASK_IRQS | -/* FIXME: check for Compatibility mode in generic IDE PCI code */ -#if defined(CONFIG_LOPEC) || defined(CONFIG_SANDPOINT) - IDE_HFLAG_FORCE_LEGACY_IRQS | -#endif IDE_HFLAG_SERIALIZE_DMA | IDE_HFLAG_NO_AUTODMA, .pio_mask = ATA_PIO5, -- cgit From e86c1451d3138b4cd0378282b30397d171fa4252 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 14 Jan 2009 19:19:03 +0100 Subject: ide: remove unused CONFIG_BLK_DEV_IDE_AU1XXX_SEQTS_PER_RQ Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- Documentation/mips/AU1xxx_IDE.README | 6 +----- drivers/ide/Kconfig | 5 ----- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/Documentation/mips/AU1xxx_IDE.README b/Documentation/mips/AU1xxx_IDE.README index f54962aea84..8ace35ebdcd 100644 --- a/Documentation/mips/AU1xxx_IDE.README +++ b/Documentation/mips/AU1xxx_IDE.README @@ -52,14 +52,12 @@ Two files are introduced: b) 'drivers/ide/mips/au1xxx-ide.c' contains the functionality of the AU1XXX IDE driver -Four configs variables are introduced: +Following extra configs variables are introduced: CONFIG_BLK_DEV_IDE_AU1XXX_PIO_DBDMA - enable the PIO+DBDMA mode CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA - enable the MWDMA mode CONFIG_BLK_DEV_IDE_AU1XXX_BURSTABLE_ON - set Burstable FIFO in DBDMA controller - CONFIG_BLK_DEV_IDE_AU1XXX_SEQTS_PER_RQ - maximum transfer size - per descriptor SUPPORTED IDE MODES @@ -87,7 +85,6 @@ CONFIG_BLK_DEV_IDEDMA_PCI=y CONFIG_IDEDMA_PCI_AUTO=y CONFIG_BLK_DEV_IDE_AU1XXX=y CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA=y -CONFIG_BLK_DEV_IDE_AU1XXX_SEQTS_PER_RQ=128 CONFIG_BLK_DEV_IDEDMA=y CONFIG_IDEDMA_AUTO=y @@ -105,7 +102,6 @@ CONFIG_BLK_DEV_IDEDMA_PCI=y CONFIG_IDEDMA_PCI_AUTO=y CONFIG_BLK_DEV_IDE_AU1XXX=y CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA=y -CONFIG_BLK_DEV_IDE_AU1XXX_SEQTS_PER_RQ=128 CONFIG_BLK_DEV_IDEDMA=y CONFIG_IDEDMA_AUTO=y diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 3f9503867e6..b1c6f68d98c 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -701,11 +701,6 @@ config BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA depends on SOC_AU1200 && BLK_DEV_IDE_AU1XXX endchoice -config BLK_DEV_IDE_AU1XXX_SEQTS_PER_RQ - int "Maximum transfer size (KB) per request (up to 128)" - default "128" - depends on BLK_DEV_IDE_AU1XXX - config BLK_DEV_IDE_TX4938 tristate "TX4938 internal IDE support" depends on SOC_TX4938 -- cgit From a294d97742568f429590cf2022d92e4b0c5f6ba0 Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Wed, 14 Jan 2009 19:19:04 +0100 Subject: tx4938ide: Fix build error due to read_sff_dma_status moving Signed-off-by: Atsushi Nemoto Acked-by: Sergei Shtylyov Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/tx4938ide.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/ide/tx4938ide.c b/drivers/ide/tx4938ide.c index b4ef218072c..d9095345f7c 100644 --- a/drivers/ide/tx4938ide.c +++ b/drivers/ide/tx4938ide.c @@ -202,7 +202,6 @@ static const struct ide_tp_ops tx4938ide_tp_ops = { .exec_command = ide_exec_command, .read_status = ide_read_status, .read_altstatus = ide_read_altstatus, - .read_sff_dma_status = ide_read_sff_dma_status, .set_irq = ide_set_irq, -- cgit From 2ea5521022ac8f4f528dcbae02668e02a3501a5a Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 14 Jan 2009 19:19:04 +0100 Subject: ide: fix suspend regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Monday 12 January 2009, Simon Holm Thøgersen wrote: > commit 295f000 ("ide: don't execute the next queued command from the > hard-IRQ context (v2)") breaks suspend to disk for me. On > 'echo disk > /sys/power/state' the systems hangs, letting me switch > virtual consoles, but not responding to Alt+SysRq Restart the request queue early for REQ_TYPE_PM_RESUME requests (though there is only one resume request for the whole resume sequence it stays in the queue until is fully completed and now depends on kblockd for processing consequential resume states). Reported-and-bisected-by: Simon Holm Thøgersen Tested-by: Simon Holm Thøgersen Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-pm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 4b3bf6a06b7..60538d9c84e 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -186,12 +186,10 @@ void ide_complete_pm_request(ide_drive_t *drive, struct request *rq) blk_pm_suspend_request(rq) ? "suspend" : "resume"); #endif spin_lock_irqsave(q->queue_lock, flags); - if (blk_pm_suspend_request(rq)) { + if (blk_pm_suspend_request(rq)) blk_stop_queue(q); - } else { + else drive->dev_flags &= ~IDE_DFLAG_BLOCKED; - blk_start_queue(q); - } spin_unlock_irqrestore(q->queue_lock, flags); drive->hwif->rq = NULL; @@ -219,6 +217,8 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq) * point. */ ide_hwif_t *hwif = drive->hwif; + struct request_queue *q = drive->queue; + unsigned long flags; int rc; #ifdef DEBUG_PM printk("%s: Wakeup request inited, waiting for !BSY...\n", drive->name); @@ -231,5 +231,9 @@ void ide_check_pm_state(ide_drive_t *drive, struct request *rq) rc = ide_wait_not_busy(hwif, 100000); if (rc) printk(KERN_WARNING "%s: drive not ready on wakeup\n", drive->name); + + spin_lock_irqsave(q->queue_lock, flags); + blk_start_queue(q); + spin_unlock_irqrestore(q->queue_lock, flags); } } -- cgit From e720b9e498b6bbb1b4f3b3d2f8e9a78578aafef7 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Wed, 14 Jan 2009 19:19:04 +0100 Subject: IDE: fix sparse signed-ness errors with host->host_busy The host_busy field in struct ide_host defaults to a signed-long, where most arch's test_and_set_bit_* macros use an unsigned long. Change to using an unsigned long, which on ARM removes the following sparse errors: drivers/ide/ide-io.c:681:8: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:681:8: expected unsigned long volatile *p drivers/ide/ide-io.c:681:8: got long volatile * drivers/ide/ide-io.c:681:8: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:681:8: expected unsigned long volatile *p drivers/ide/ide-io.c:681:8: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * drivers/ide/ide-io.c:695:3: warning: incorrect type in argument 2 (different signedness) drivers/ide/ide-io.c:695:3: expected unsigned long volatile *p drivers/ide/ide-io.c:695:3: got long volatile * Signed-off-by: Ben Dooks Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/ide.h b/include/linux/ide.h index 3644f632338..194da5a4b0d 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -871,7 +871,7 @@ struct ide_host { ide_hwif_t *cur_port; /* for hosts requiring serialization */ /* used for hosts requiring serialization */ - volatile long host_busy; + volatile unsigned long host_busy; }; #define IDE_HOST_BUSY 0 -- cgit From 1b0652eb588e57c3ab230e0291e7da99c7e665e0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 14 Jan 2009 08:27:35 +0100 Subject: ALSA: hda - Fix HP dv5 mic input Fix HP dv5 (103c:3603) built-in mic input. Reference: kernel bug 12440 http://bugzilla.kernel.org/show_bug.cgi?id=12440 Signed-off-by: Takashi Iwai Cc: stable@kernel.org --- sound/pci/hda/patch_sigmatel.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 6d9a4a2aa4a..e6c13963f21 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -90,6 +90,7 @@ enum { STAC_DELL_M4_2, STAC_DELL_M4_3, STAC_HP_M4, + STAC_HP_DV5, STAC_92HD71BXX_MODELS }; @@ -1778,6 +1779,7 @@ static unsigned int *stac92hd71bxx_brd_tbl[STAC_92HD71BXX_MODELS] = { [STAC_DELL_M4_2] = dell_m4_2_pin_configs, [STAC_DELL_M4_3] = dell_m4_3_pin_configs, [STAC_HP_M4] = NULL, + [STAC_HP_DV5] = NULL, }; static const char *stac92hd71bxx_models[STAC_92HD71BXX_MODELS] = { @@ -1786,6 +1788,7 @@ static const char *stac92hd71bxx_models[STAC_92HD71BXX_MODELS] = { [STAC_DELL_M4_2] = "dell-m4-2", [STAC_DELL_M4_3] = "dell-m4-3", [STAC_HP_M4] = "hp-m4", + [STAC_HP_DV5] = "hp-dv5", }; static struct snd_pci_quirk stac92hd71bxx_cfg_tbl[] = { @@ -1799,7 +1802,7 @@ static struct snd_pci_quirk stac92hd71bxx_cfg_tbl[] = { SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x30fc, "HP dv7", STAC_HP_M4), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3603, - "HP dv5", STAC_HP_M4), + "HP dv5", STAC_HP_DV5), SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x361a, "unknown HP", STAC_HP_M4), SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x0233, -- cgit From 483a2b3a3182abcb7fcea986d7ea13e793bb00b1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 14 Jan 2009 14:35:15 -0800 Subject: ARM etherh: Fix build failure. Reported by Russell King: drivers/net/arm/etherh.c:649: error: unknown field 'ndo_set_mac_addr' specified in initializer Signed-off-by: David S. Miller --- drivers/net/arm/etherh.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/arm/etherh.c b/drivers/net/arm/etherh.c index 745ac188bab..d15d8b79d8e 100644 --- a/drivers/net/arm/etherh.c +++ b/drivers/net/arm/etherh.c @@ -646,7 +646,7 @@ static const struct net_device_ops etherh_netdev_ops = { .ndo_get_stats = ei_get_stats, .ndo_set_multicast_list = ei_set_multicast_list, .ndo_validate_addr = eth_validate_addr, - .ndo_set_mac_addr = eth_set_mac_addr, + .ndo_set_mac_address = eth_set_mac_addr, .ndo_change_mtu = eth_change_mtu, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = ei_poll, -- cgit From f17f5c91ae3bfeb5cfc37fa132a5fdfceb8927be Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 14 Jan 2009 14:36:12 -0800 Subject: gro: Check for GSO packets and packets with frag_list As GRO cannot be applied to packets with frag_list we need to make sure that we reject such packets if they are fed to us, e.g., through a tunnel device. Also there is no point in applying GRO on GSO packets so they too should be rejected. This allows GRO to be used in virtio-net which may produce GSO packets directly but may still benefit from GRO if the other end of it doesn't support GSO. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index b715a55cccc..7dec715293b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2392,6 +2392,9 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) if (!(skb->dev->features & NETIF_F_GRO)) goto normal; + if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list) + goto normal; + rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { struct sk_buff *p; -- cgit From f0d44ae310bc8eb0b6694e257015d8b24e1a357c Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 14 Jan 2009 14:38:02 -0800 Subject: phylib: Fix Freescale TBI PHY detection Freescale on-chip TBI PHYs reports PHY ID as 0x0, but as of commit 3ee82383f0098a2e13acc8cf1be8e47512f41e5a Author: Giulio Benetti Date: Thu Nov 13 21:53:13 2008 +0000 phy: fix phy address bug PHYID returns 0xffff and not 0xffffffff when not found and in some case(at91sam9263) 0x0. Maybe this patch could be useful. phy_device.c treats PHY ID == 0x0 as bogus IDs, and that results in gianfar driver failure to see the TBI PHYs. This code snippet triggers: if (!priv->tbiphy) { printk(KERN_WARNING "SGMII mode requires that the device " "tree specify a tbi-handle\n"); return; } Although tbi-handle is specified in the device tree. Btw, technically PHY ID == 0x0 is a valid ID (if we ever see a PHY manufactured by Xerox :-). Signed-off-by: Anton Vorontsov Acked-by: Andy Fleming Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index e35460165bf..0a06e4fd37d 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -231,15 +231,6 @@ struct phy_device * get_phy_device(struct mii_bus *bus, int addr) if ((phy_id & 0x1fffffff) == 0x1fffffff) return NULL; - /* - * Broken hardware is sometimes missing the pull-up resistor on the - * MDIO line, which results in reads to non-existent devices returning - * 0 rather than 0xffff. Catch this here and treat 0 as a non-existent - * device as well. - */ - if (phy_id == 0) - return NULL; - dev = phy_device_create(bus, addr, phy_id); return dev; -- cgit From bae584316045011ce3376816585a305d2b9b76f2 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Wed, 14 Jan 2009 14:41:00 -0800 Subject: i4l: do not print a warning when shutting down an i4l ppp interface When an i4l ppp interface is shut down (e.g. with /sbin/ifdown ippp0) a scary warning is logged: isdn_free_channel: called with invalid drv(-1) or channel(-1) This warning is caused by isdn_net_unbind_channel(), which always calls isdn_free_channel() even if isdn_net_local->isdn_device and isdn_net_local->isdn_channel are (still) in a perfectly acceptable default state, so let's not do that. Signed-off-by: Paul Bolle Signed-off-by: David S. Miller --- drivers/isdn/i4l/isdn_net.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index 7c5f97033b9..de6be7181f0 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -292,7 +292,9 @@ isdn_net_unbind_channel(isdn_net_local * lp) lp->dialstate = 0; dev->rx_netdev[isdn_dc2minor(lp->isdn_device, lp->isdn_channel)] = NULL; dev->st_netdev[isdn_dc2minor(lp->isdn_device, lp->isdn_channel)] = NULL; - isdn_free_channel(lp->isdn_device, lp->isdn_channel, ISDN_USAGE_NET); + if (lp->isdn_device != -1 && lp->isdn_channel != -1) + isdn_free_channel(lp->isdn_device, lp->isdn_channel, + ISDN_USAGE_NET); lp->flags &= ~ISDN_NET_CONNECTED; lp->isdn_device = -1; lp->isdn_channel = -1; -- cgit From 5f3e54057c62e5f654c66e4ce1172993f67fc284 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Wed, 14 Jan 2009 14:42:21 -0800 Subject: i4l: minor cleanups Minor cleanups, either made possible or obvious after commit d700555 (I4l: convert to net_device_ops). Signed-off-by: Paul Bolle Signed-off-by: David S. Miller --- drivers/isdn/i4l/isdn_net.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index de6be7181f0..cb8943da4f1 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -2515,7 +2515,6 @@ static const struct net_device_ops isdn_netdev_ops = { .ndo_stop = isdn_net_close, .ndo_do_ioctl = isdn_net_ioctl, - .ndo_validate_addr = NULL, .ndo_start_xmit = isdn_net_start_xmit, .ndo_get_stats = isdn_net_get_stats, .ndo_tx_timeout = isdn_net_tx_timeout, @@ -2530,12 +2529,8 @@ static void _isdn_setup(struct net_device *dev) ether_setup(dev); - dev->flags = IFF_NOARP | IFF_POINTOPOINT; /* Setup the generic properties */ - dev->mtu = 1500; dev->flags = IFF_NOARP|IFF_POINTOPOINT; - dev->type = ARPHRD_ETHER; - dev->addr_len = ETH_ALEN; dev->header_ops = NULL; dev->netdev_ops = &isdn_netdev_ops; -- cgit From fc8c7dc1b29560c016a67a34ccff32a712b5aa86 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 14 Jan 2009 14:55:35 -0800 Subject: xfrm: For 32/64 compatability wrt. xfrm_usersa_info Reported by Jiri Klimes. Fix suggested by Patrick McHardy. Signed-off-by: David S. Miller --- net/xfrm/xfrm_user.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b95a2d64eb5..7877e7975da 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1914,10 +1914,17 @@ static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, } #endif +/* For the xfrm_usersa_info cases we have to work around some 32-bit vs. + * 64-bit compatability issues. On 32-bit the structure is 220 bytes, but + * for 64-bit it gets padded out to 224 bytes. Those bytes are just + * padding and don't have any content we care about. Therefore as long + * as we have enough bytes for the content we can make both cases work. + */ + #define XMSGSIZE(type) sizeof(struct type) static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { - [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), + [XFRM_MSG_NEWSA - XFRM_MSG_BASE] = 220, /* see above */ [XFRM_MSG_DELSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), [XFRM_MSG_GETSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_id), [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_info), @@ -1927,7 +1934,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_ACQUIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_acquire), [XFRM_MSG_EXPIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_expire), [XFRM_MSG_UPDPOLICY - XFRM_MSG_BASE] = XMSGSIZE(xfrm_userpolicy_info), - [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_info), + [XFRM_MSG_UPDSA - XFRM_MSG_BASE] = 220, /* see above */ [XFRM_MSG_POLEXPIRE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_polexpire), [XFRM_MSG_FLUSHSA - XFRM_MSG_BASE] = XMSGSIZE(xfrm_usersa_flush), [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = 0, -- cgit From aa0effbd1c0269ce7dd13be8dbf13e6b04f30ebd Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 13 Jan 2009 19:31:29 -0800 Subject: cris: introduce asm/swab.h Adjust the arch overrides to the new names as well. Signed-off-by: Harvey Harrison Signed-off-by: Linus Torvalds --- arch/cris/include/arch-v10/arch/byteorder.h | 26 ------------------------- arch/cris/include/arch-v10/arch/swab.h | 30 +++++++++++++++++++++++++++++ arch/cris/include/arch-v32/arch/byteorder.h | 20 ------------------- arch/cris/include/arch-v32/arch/swab.h | 24 +++++++++++++++++++++++ arch/cris/include/asm/Kbuild | 1 + arch/cris/include/asm/byteorder.h | 20 +------------------ arch/cris/include/asm/swab.h | 8 ++++++++ 7 files changed, 64 insertions(+), 65 deletions(-) delete mode 100644 arch/cris/include/arch-v10/arch/byteorder.h create mode 100644 arch/cris/include/arch-v10/arch/swab.h delete mode 100644 arch/cris/include/arch-v32/arch/byteorder.h create mode 100644 arch/cris/include/arch-v32/arch/swab.h create mode 100644 arch/cris/include/asm/swab.h diff --git a/arch/cris/include/arch-v10/arch/byteorder.h b/arch/cris/include/arch-v10/arch/byteorder.h deleted file mode 100644 index 255b646b7fa..00000000000 --- a/arch/cris/include/arch-v10/arch/byteorder.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef _CRIS_ARCH_BYTEORDER_H -#define _CRIS_ARCH_BYTEORDER_H - -#include -#include - -/* we just define these two (as we can do the swap in a single - * asm instruction in CRIS) and the arch-independent files will put - * them together into ntohl etc. - */ - -static inline __attribute_const__ __u32 ___arch__swab32(__u32 x) -{ - __asm__ ("swapwb %0" : "=r" (x) : "0" (x)); - - return(x); -} - -static inline __attribute_const__ __u16 ___arch__swab16(__u16 x) -{ - __asm__ ("swapb %0" : "=r" (x) : "0" (x)); - - return(x); -} - -#endif diff --git a/arch/cris/include/arch-v10/arch/swab.h b/arch/cris/include/arch-v10/arch/swab.h new file mode 100644 index 00000000000..e4e847d8a05 --- /dev/null +++ b/arch/cris/include/arch-v10/arch/swab.h @@ -0,0 +1,30 @@ +#ifndef _CRIS_ARCH_SWAB_H +#define _CRIS_ARCH_SWAB_H + +#include +#include + +#define __SWAB_64_THRU_32__ + +/* we just define these two (as we can do the swap in a single + * asm instruction in CRIS) and the arch-independent files will put + * them together into ntohl etc. + */ + +static inline __attribute_const__ __u32 __arch_swab32(__u32 x) +{ + __asm__ ("swapwb %0" : "=r" (x) : "0" (x)); + + return(x); +} +#define __arch_swab32 __arch_swab32 + +static inline __attribute_const__ __u16 __arch_swab16(__u16 x) +{ + __asm__ ("swapb %0" : "=r" (x) : "0" (x)); + + return(x); +} +#define __arch_swab16 __arch_swab16 + +#endif diff --git a/arch/cris/include/arch-v32/arch/byteorder.h b/arch/cris/include/arch-v32/arch/byteorder.h deleted file mode 100644 index 6ef8fb4a35f..00000000000 --- a/arch/cris/include/arch-v32/arch/byteorder.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef _ASM_CRIS_ARCH_BYTEORDER_H -#define _ASM_CRIS_ARCH_BYTEORDER_H - -#include - -static inline __const__ __u32 -___arch__swab32(__u32 x) -{ - __asm__ __volatile__ ("swapwb %0" : "=r" (x) : "0" (x)); - return (x); -} - -static inline __const__ __u16 -___arch__swab16(__u16 x) -{ - __asm__ __volatile__ ("swapb %0" : "=r" (x) : "0" (x)); - return (x); -} - -#endif /* _ASM_CRIS_ARCH_BYTEORDER_H */ diff --git a/arch/cris/include/arch-v32/arch/swab.h b/arch/cris/include/arch-v32/arch/swab.h new file mode 100644 index 00000000000..9a4ea5e209c --- /dev/null +++ b/arch/cris/include/arch-v32/arch/swab.h @@ -0,0 +1,24 @@ +#ifndef _ASM_CRIS_ARCH_SWAB_H +#define _ASM_CRIS_ARCH_SWAB_H + +#include + +#define __SWAB_64_THRU_32__ + +static inline __const__ __u32 +__arch_swab32(__u32 x) +{ + __asm__ __volatile__ ("swapwb %0" : "=r" (x) : "0" (x)); + return (x); +} +#define __arch_swab32 __arch_swab32 + +static inline __const__ __u16 +__arch_swab16(__u16 x) +{ + __asm__ __volatile__ ("swapb %0" : "=r" (x) : "0" (x)); + return (x); +} +#define __arch_swab16 __arch_swab16 + +#endif /* _ASM_CRIS_ARCH_SWAB_H */ diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index d5b631935ec..b79b7c6543a 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -9,3 +9,4 @@ header-y += sync_serial.h unifdef-y += etraxgpio.h unifdef-y += rs485.h +unifdef-y += swab.h diff --git a/arch/cris/include/asm/byteorder.h b/arch/cris/include/asm/byteorder.h index cc8e418cfd1..7678d86317a 100644 --- a/arch/cris/include/asm/byteorder.h +++ b/arch/cris/include/asm/byteorder.h @@ -1,25 +1,7 @@ #ifndef _CRIS_BYTEORDER_H #define _CRIS_BYTEORDER_H -#ifdef __GNUC__ - -#ifdef __KERNEL__ -#include - -/* defines are necessary because the other files detect the presence - * of a defined __arch_swab32, not an inline - */ -#define __arch__swab32(x) ___arch__swab32(x) -#define __arch__swab16(x) ___arch__swab16(x) -#endif /* __KERNEL__ */ - -#if !defined(__STRICT_ANSI__) || defined(__KERNEL__) -# define __BYTEORDER_HAS_U64__ -# define __SWAB_64_THRU_32__ -#endif - -#endif /* __GNUC__ */ - +#include #include #endif diff --git a/arch/cris/include/asm/swab.h b/arch/cris/include/asm/swab.h new file mode 100644 index 00000000000..80668e88419 --- /dev/null +++ b/arch/cris/include/asm/swab.h @@ -0,0 +1,8 @@ +#ifndef _CRIS_SWAB_H +#define _CRIS_SWAB_H + +#ifdef __KERNEL__ +#include +#endif /* __KERNEL__ */ + +#endif /* _CRIS_SWAB_H */ -- cgit From 74d96f018673759d04d032c137d132f6447bfb1e Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Tue, 13 Jan 2009 19:27:09 -0800 Subject: byteorder: make swab.h include asm/swab.h like a regular header Add swab.h to kbuild.asm and remove the individual entries from each arch, mark as unifdef as some arches have some kernel-only bits inside. Signed-off-by: Harvey Harrison Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/Kbuild | 1 - arch/alpha/include/asm/byteorder.h | 1 - arch/arm/include/asm/Kbuild | 1 - arch/arm/include/asm/byteorder.h | 2 -- arch/avr32/include/asm/Kbuild | 1 - arch/avr32/include/asm/byteorder.h | 1 - arch/blackfin/include/asm/Kbuild | 1 - arch/blackfin/include/asm/byteorder.h | 1 - arch/cris/include/asm/Kbuild | 1 - arch/cris/include/asm/byteorder.h | 1 - arch/h8300/include/asm/Kbuild | 1 - arch/h8300/include/asm/byteorder.h | 1 - arch/ia64/include/asm/Kbuild | 1 - arch/ia64/include/asm/byteorder.h | 1 - arch/m68knommu/include/asm/Kbuild | 2 -- arch/m68knommu/include/asm/byteorder.h | 1 - arch/mips/include/asm/Kbuild | 1 - arch/mips/include/asm/byteorder.h | 2 -- arch/parisc/include/asm/Kbuild | 1 - arch/parisc/include/asm/byteorder.h | 1 - arch/powerpc/include/asm/Kbuild | 1 - arch/powerpc/include/asm/byteorder.h | 2 -- arch/s390/include/asm/Kbuild | 1 - arch/s390/include/asm/byteorder.h | 1 - arch/sh/include/asm/Kbuild | 1 - arch/sh/include/asm/byteorder.h | 2 -- arch/sparc/include/asm/Kbuild | 1 - arch/sparc/include/asm/byteorder.h | 1 - arch/x86/include/asm/Kbuild | 1 - arch/x86/include/asm/byteorder.h | 1 - arch/xtensa/include/asm/Kbuild | 2 -- arch/xtensa/include/asm/byteorder.h | 2 -- include/asm-frv/Kbuild | 1 - include/asm-frv/byteorder.h | 1 - include/asm-generic/Kbuild.asm | 1 + include/asm-m32r/Kbuild | 1 - include/asm-m32r/byteorder.h | 2 -- include/asm-m68k/Kbuild | 1 - include/asm-m68k/byteorder.h | 1 - include/asm-mn10300/Kbuild | 1 - include/asm-mn10300/byteorder.h | 1 - include/linux/swab.h | 2 +- 42 files changed, 2 insertions(+), 49 deletions(-) diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 4dad2736057..b7c8f188b31 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -9,4 +9,3 @@ unifdef-y += console.h unifdef-y += fpu.h unifdef-y += sysinfo.h unifdef-y += compiler.h -unifdef-y += swab.h diff --git a/arch/alpha/include/asm/byteorder.h b/arch/alpha/include/asm/byteorder.h index 6772f316870..73683093202 100644 --- a/arch/alpha/include/asm/byteorder.h +++ b/arch/alpha/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _ALPHA_BYTEORDER_H #define _ALPHA_BYTEORDER_H -#include #include #endif /* _ALPHA_BYTEORDER_H */ diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 43b0b2ba392..73237bd130a 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -1,4 +1,3 @@ include include/asm-generic/Kbuild.asm unifdef-y += hwcap.h -unifdef-y += swab.h diff --git a/arch/arm/include/asm/byteorder.h b/arch/arm/include/asm/byteorder.h index c02b6fc28e1..77379748b17 100644 --- a/arch/arm/include/asm/byteorder.h +++ b/arch/arm/include/asm/byteorder.h @@ -15,8 +15,6 @@ #ifndef __ASM_ARM_BYTEORDER_H #define __ASM_ARM_BYTEORDER_H -#include - #ifdef __ARMEB__ #include #else diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 219822c8ad1..3136628ba8d 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -1,4 +1,3 @@ include include/asm-generic/Kbuild.asm -header-y += swab.h header-y += cachectl.h diff --git a/arch/avr32/include/asm/byteorder.h b/arch/avr32/include/asm/byteorder.h index 2aba64b4e12..50abc21619a 100644 --- a/arch/avr32/include/asm/byteorder.h +++ b/arch/avr32/include/asm/byteorder.h @@ -4,7 +4,6 @@ #ifndef __ASM_AVR32_BYTEORDER_H #define __ASM_AVR32_BYTEORDER_H -#include #include #endif /* __ASM_AVR32_BYTEORDER_H */ diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index d0d1ac43554..606ecfdcc96 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -1,4 +1,3 @@ include include/asm-generic/Kbuild.asm unifdef-y += fixed_code.h -unifdef-y += swab.h diff --git a/arch/blackfin/include/asm/byteorder.h b/arch/blackfin/include/asm/byteorder.h index b9e797a497b..3e69106a4d3 100644 --- a/arch/blackfin/include/asm/byteorder.h +++ b/arch/blackfin/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _BLACKFIN_BYTEORDER_H #define _BLACKFIN_BYTEORDER_H -#include #include #endif /* _BLACKFIN_BYTEORDER_H */ diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index b79b7c6543a..d5b631935ec 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -9,4 +9,3 @@ header-y += sync_serial.h unifdef-y += etraxgpio.h unifdef-y += rs485.h -unifdef-y += swab.h diff --git a/arch/cris/include/asm/byteorder.h b/arch/cris/include/asm/byteorder.h index 7678d86317a..bcd189798e2 100644 --- a/arch/cris/include/asm/byteorder.h +++ b/arch/cris/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _CRIS_BYTEORDER_H #define _CRIS_BYTEORDER_H -#include #include #endif diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 27b108a86b3..c68e1680da0 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -1,2 +1 @@ include include/asm-generic/Kbuild.asm -unifdef-y += swab.h diff --git a/arch/h8300/include/asm/byteorder.h b/arch/h8300/include/asm/byteorder.h index c36b80a3dd8..13539da99ef 100644 --- a/arch/h8300/include/asm/byteorder.h +++ b/arch/h8300/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _H8300_BYTEORDER_H #define _H8300_BYTEORDER_H -#include #include #endif /* _H8300_BYTEORDER_H */ diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 3b25bd9dca9..ccbe8ae47a6 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -14,4 +14,3 @@ unifdef-y += gcc_intrin.h unifdef-y += intrinsics.h unifdef-y += perfmon.h unifdef-y += ustack.h -unifdef-y += swab.h diff --git a/arch/ia64/include/asm/byteorder.h b/arch/ia64/include/asm/byteorder.h index 0f84c5cb703..a8dd7355815 100644 --- a/arch/ia64/include/asm/byteorder.h +++ b/arch/ia64/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _ASM_IA64_BYTEORDER_H #define _ASM_IA64_BYTEORDER_H -#include #include #endif /* _ASM_IA64_BYTEORDER_H */ diff --git a/arch/m68knommu/include/asm/Kbuild b/arch/m68knommu/include/asm/Kbuild index 58c02a45413..c68e1680da0 100644 --- a/arch/m68knommu/include/asm/Kbuild +++ b/arch/m68knommu/include/asm/Kbuild @@ -1,3 +1 @@ include include/asm-generic/Kbuild.asm - -unifdef-y += swab.h diff --git a/arch/m68knommu/include/asm/byteorder.h b/arch/m68knommu/include/asm/byteorder.h index a6f0b8f7f62..9c6c76a1504 100644 --- a/arch/m68knommu/include/asm/byteorder.h +++ b/arch/m68knommu/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _M68KNOMMU_BYTEORDER_H #define _M68KNOMMU_BYTEORDER_H -#include #include #endif /* _M68KNOMMU_BYTEORDER_H */ diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 023866c0c10..7897f05e316 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -1,4 +1,3 @@ include include/asm-generic/Kbuild.asm header-y += cachectl.h sgidefs.h sysmips.h -header-y += swab.h diff --git a/arch/mips/include/asm/byteorder.h b/arch/mips/include/asm/byteorder.h index 607b7183070..9579051ff1c 100644 --- a/arch/mips/include/asm/byteorder.h +++ b/arch/mips/include/asm/byteorder.h @@ -8,8 +8,6 @@ #ifndef _ASM_BYTEORDER_H #define _ASM_BYTEORDER_H -#include - #if defined(__MIPSEB__) #include #elif defined(__MIPSEL__) diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 2121d99f836..f88b252e419 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -1,4 +1,3 @@ include include/asm-generic/Kbuild.asm unifdef-y += pdc.h -unifdef-y += swab.h diff --git a/arch/parisc/include/asm/byteorder.h b/arch/parisc/include/asm/byteorder.h index da66029c4cb..58af2c5f5d6 100644 --- a/arch/parisc/include/asm/byteorder.h +++ b/arch/parisc/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _PARISC_BYTEORDER_H #define _PARISC_BYTEORDER_H -#include #include #endif /* _PARISC_BYTEORDER_H */ diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 9268602de5d..5ab7d7fe198 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -35,4 +35,3 @@ unifdef-y += spu_info.h unifdef-y += termios.h unifdef-y += types.h unifdef-y += unistd.h -unifdef-y += swab.h diff --git a/arch/powerpc/include/asm/byteorder.h b/arch/powerpc/include/asm/byteorder.h index 5cca27a4153..aa6cc4fac96 100644 --- a/arch/powerpc/include/asm/byteorder.h +++ b/arch/powerpc/include/asm/byteorder.h @@ -7,8 +7,6 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ - -#include #include #endif /* _ASM_POWERPC_BYTEORDER_H */ diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index f2af4167bd5..63a23415fba 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -13,4 +13,3 @@ unifdef-y += cmb.h unifdef-y += debug.h unifdef-y += chpid.h unifdef-y += schid.h -unifdef-y += swab.h diff --git a/arch/s390/include/asm/byteorder.h b/arch/s390/include/asm/byteorder.h index b95a2b2933f..a332e59e26f 100644 --- a/arch/s390/include/asm/byteorder.h +++ b/arch/s390/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _S390_BYTEORDER_H #define _S390_BYTEORDER_H -#include #include #endif /* _S390_BYTEORDER_H */ diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index f1a2a0d1c79..43910cdf78a 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -6,4 +6,3 @@ unifdef-y += unistd_32.h unifdef-y += unistd_64.h unifdef-y += posix_types_32.h unifdef-y += posix_types_64.h -unifdef-y += swab.h diff --git a/arch/sh/include/asm/byteorder.h b/arch/sh/include/asm/byteorder.h index e95c41a5c8c..db2f5d7cb17 100644 --- a/arch/sh/include/asm/byteorder.h +++ b/arch/sh/include/asm/byteorder.h @@ -1,8 +1,6 @@ #ifndef __ASM_SH_BYTEORDER_H #define __ASM_SH_BYTEORDER_H -#include - #ifdef __LITTLE_ENDIAN__ #include #else diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 95e38a43dff..deeb0fba802 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -17,4 +17,3 @@ header-y += traps.h header-y += uctx.h header-y += utrap.h header-y += watchdog.h -header-y += swab.h diff --git a/arch/sparc/include/asm/byteorder.h b/arch/sparc/include/asm/byteorder.h index 48a047cd6fa..ccc1b6b7de6 100644 --- a/arch/sparc/include/asm/byteorder.h +++ b/arch/sparc/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _SPARC_BYTEORDER_H #define _SPARC_BYTEORDER_H -#include #include #endif /* _SPARC_BYTEORDER_H */ diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index a9f8a814a1f..4a8e80cdcfa 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -22,4 +22,3 @@ unifdef-y += unistd_32.h unifdef-y += unistd_64.h unifdef-y += vm86.h unifdef-y += vsyscall.h -unifdef-y += swab.h diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h index 7c49917e3d9..b13a7a88f3e 100644 --- a/arch/x86/include/asm/byteorder.h +++ b/arch/x86/include/asm/byteorder.h @@ -1,7 +1,6 @@ #ifndef _ASM_X86_BYTEORDER_H #define _ASM_X86_BYTEORDER_H -#include #include #endif /* _ASM_X86_BYTEORDER_H */ diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 58c02a45413..c68e1680da0 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -1,3 +1 @@ include include/asm-generic/Kbuild.asm - -unifdef-y += swab.h diff --git a/arch/xtensa/include/asm/byteorder.h b/arch/xtensa/include/asm/byteorder.h index 329b94591ca..54eb6315349 100644 --- a/arch/xtensa/include/asm/byteorder.h +++ b/arch/xtensa/include/asm/byteorder.h @@ -1,8 +1,6 @@ #ifndef _XTENSA_BYTEORDER_H #define _XTENSA_BYTEORDER_H -#include - #ifdef __XTENSA_EL__ #include #elif defined(__XTENSA_EB__) diff --git a/include/asm-frv/Kbuild b/include/asm-frv/Kbuild index 1f44e7c7699..0f8956def73 100644 --- a/include/asm-frv/Kbuild +++ b/include/asm-frv/Kbuild @@ -3,4 +3,3 @@ include include/asm-generic/Kbuild.asm header-y += registers.h unifdef-y += termios.h -unifdef-y += swab.h diff --git a/include/asm-frv/byteorder.h b/include/asm-frv/byteorder.h index 1187e51ecd1..f29b7593e08 100644 --- a/include/asm-frv/byteorder.h +++ b/include/asm-frv/byteorder.h @@ -1,7 +1,6 @@ #ifndef _ASM_BYTEORDER_H #define _ASM_BYTEORDER_H -#include #include #endif /* _ASM_BYTEORDER_H */ diff --git a/include/asm-generic/Kbuild.asm b/include/asm-generic/Kbuild.asm index 1870d5e05f1..70d185534b9 100644 --- a/include/asm-generic/Kbuild.asm +++ b/include/asm-generic/Kbuild.asm @@ -31,6 +31,7 @@ unifdef-y += socket.h unifdef-y += sockios.h unifdef-y += stat.h unifdef-y += statfs.h +unifdef-y += swab.h unifdef-y += termbits.h unifdef-y += termios.h unifdef-y += types.h diff --git a/include/asm-m32r/Kbuild b/include/asm-m32r/Kbuild index 27b108a86b3..c68e1680da0 100644 --- a/include/asm-m32r/Kbuild +++ b/include/asm-m32r/Kbuild @@ -1,2 +1 @@ include include/asm-generic/Kbuild.asm -unifdef-y += swab.h diff --git a/include/asm-m32r/byteorder.h b/include/asm-m32r/byteorder.h index 61ff9cfd845..21855d8b028 100644 --- a/include/asm-m32r/byteorder.h +++ b/include/asm-m32r/byteorder.h @@ -1,8 +1,6 @@ #ifndef _ASM_M32R_BYTEORDER_H #define _ASM_M32R_BYTEORDER_H -#include - #if defined(__LITTLE_ENDIAN__) # include #else diff --git a/include/asm-m68k/Kbuild b/include/asm-m68k/Kbuild index 52fd96b4142..1a922fad76f 100644 --- a/include/asm-m68k/Kbuild +++ b/include/asm-m68k/Kbuild @@ -1,3 +1,2 @@ include include/asm-generic/Kbuild.asm header-y += cachectl.h -unifdef-y += swab.h diff --git a/include/asm-m68k/byteorder.h b/include/asm-m68k/byteorder.h index 300866523b8..31b260a8880 100644 --- a/include/asm-m68k/byteorder.h +++ b/include/asm-m68k/byteorder.h @@ -1,7 +1,6 @@ #ifndef _M68K_BYTEORDER_H #define _M68K_BYTEORDER_H -#include #include #endif /* _M68K_BYTEORDER_H */ diff --git a/include/asm-mn10300/Kbuild b/include/asm-mn10300/Kbuild index 27b108a86b3..c68e1680da0 100644 --- a/include/asm-mn10300/Kbuild +++ b/include/asm-mn10300/Kbuild @@ -1,2 +1 @@ include include/asm-generic/Kbuild.asm -unifdef-y += swab.h diff --git a/include/asm-mn10300/byteorder.h b/include/asm-mn10300/byteorder.h index 45b18ded19e..5dd0bdd9fee 100644 --- a/include/asm-mn10300/byteorder.h +++ b/include/asm-mn10300/byteorder.h @@ -1,7 +1,6 @@ #ifndef _ASM_BYTEORDER_H #define _ASM_BYTEORDER_H -#include #include #endif /* _ASM_BYTEORDER_H */ diff --git a/include/linux/swab.h b/include/linux/swab.h index be5284d4a05..ea0c02fd516 100644 --- a/include/linux/swab.h +++ b/include/linux/swab.h @@ -3,7 +3,7 @@ #include #include -#include +#include /* * casts are necessary for constants, because we never know how for sure -- cgit From f557206800801410c30e53ce7a27219b2c4cf0ba Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 14 Jan 2009 20:40:03 -0800 Subject: gro: Fix page ref count for skbs freed normally When an skb with page frags is merged into an existing one, we cannibalise its reference count. This is OK when the skb is reused because we set nr_frags to zero in that case. However, for the case where the skb is freed through kfree_skb, we didn't clear nr_frags which causes the page to be freed prematurely. This is fixed by moving the skb resetting into skb_gro_receive. Reported-by: Jeff Kirsher Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 6 ------ net/core/skbuff.c | 6 ++++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 7dec715293b..60377b6c0a8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2491,12 +2491,6 @@ EXPORT_SYMBOL(napi_gro_receive); void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) { - skb_shinfo(skb)->nr_frags = 0; - - skb->len -= skb->data_len; - skb->truesize -= skb->data_len; - skb->data_len = 0; - __skb_pull(skb, skb_headlen(skb)); skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5110b359c75..65eac773903 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2602,6 +2602,12 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); skb_shinfo(p)->nr_frags += skb_shinfo(skb)->nr_frags; + skb_shinfo(skb)->nr_frags = 0; + + skb->truesize -= skb->data_len; + skb->len -= skb->data_len; + skb->data_len = 0; + NAPI_GRO_CB(skb)->free = 1; goto done; } -- cgit From 4e704ee3c2cd38748ca59d835435d6a7e7f6f613 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 14 Jan 2009 20:41:12 -0800 Subject: gso: Ensure that the packet is long enough When we get a GSO packet from an untrusted source, we need to ensure that it is sufficiently long so that we don't end up crashing. Based on discovery and patch by Ian Campbell. Signed-off-by: Herbert Xu Tested-by: Ian Campbell Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 48ada1b2d2c..0cd71b84e48 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2389,7 +2389,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) unsigned int seq; __be32 delta; unsigned int oldlen; - unsigned int len; + unsigned int mss; if (!pskb_may_pull(skb, sizeof(*th))) goto out; @@ -2405,10 +2405,13 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) oldlen = (u16)~skb->len; __skb_pull(skb, thlen); + mss = skb_shinfo(skb)->gso_size; + if (unlikely(skb->len <= mss)) + goto out; + if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ int type = skb_shinfo(skb)->gso_type; - int mss; if (unlikely(type & ~(SKB_GSO_TCPV4 | @@ -2419,7 +2422,6 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))) goto out; - mss = skb_shinfo(skb)->gso_size; skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); segs = NULL; @@ -2430,8 +2432,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) if (IS_ERR(segs)) goto out; - len = skb_shinfo(skb)->gso_size; - delta = htonl(oldlen + (thlen + len)); + delta = htonl(oldlen + (thlen + mss)); skb = segs; th = tcp_hdr(skb); @@ -2447,7 +2448,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) csum_fold(csum_partial(skb_transport_header(skb), thlen, skb->csum)); - seq += len; + seq += mss; skb = skb->next; th = tcp_hdr(skb); -- cgit From d7e094d4212bc72f5575e54edfef1349e0c4cdb5 Mon Sep 17 00:00:00 2001 From: Mike Ditto Date: Wed, 14 Jan 2009 20:43:43 -0800 Subject: powerpc/fs_enet: Add missing irq free in error path. If something goes wrong attaching to phy driver, we weren't freeing the IRQ. Signed-off-by: Mike Ditto Signed-off-by: David S. Miller --- drivers/net/fs_enet/fs_enet-main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c index 4e6a9195fe5..ce900e54d8d 100644 --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -795,6 +795,7 @@ static int fs_enet_open(struct net_device *dev) err = fs_init_phy(dev); if (err) { + free_irq(fep->interrupt, dev); if (fep->fpi->use_napi) napi_disable(&fep->napi); return err; -- cgit From d1d5e6b1cead3df6f722d1d458874bd7f93da8d6 Mon Sep 17 00:00:00 2001 From: Daniele Venzano Date: Wed, 14 Jan 2009 20:46:24 -0800 Subject: sis900: generate fake MAC address if the hardware doesn't have one The attached patch modifies the sis900 driver when the MAC address read from the hardware is invalid. As suggested, the patch now generates a random address so that the user can go on and use the hardware. In any case a message is also shown to warn on the unexpected condition. This seems to happen with newer HW implementation of the sis900 chipset, since this never came up before. Patch is against vanilla 2.6.28 (but the driver doesn't change so often, so it will probably apply to older/newer versions too). See bugzilla ID 10201 and 11649 and ignore the previous patch. Signed-off-by: Daniele Venzano Signed-off-by: David S. Miller --- drivers/net/sis900.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c index 6cbefcae9ac..be4465bc0a6 100644 --- a/drivers/net/sis900.c +++ b/drivers/net/sis900.c @@ -509,10 +509,10 @@ static int __devinit sis900_probe(struct pci_dev *pci_dev, else ret = sis900_get_mac_addr(pci_dev, net_dev); - if (ret == 0) { - printk(KERN_WARNING "%s: Cannot read MAC address.\n", dev_name); - ret = -ENODEV; - goto err_unmap_rx; + if (!ret || !is_valid_ether_addr(net_dev->dev_addr)) { + random_ether_addr(net_dev->dev_addr); + printk(KERN_WARNING "%s: Unreadable or invalid MAC address," + "using random generated one\n", dev_name); } /* 630ET : set the mii access mode as software-mode */ -- cgit From 2edbb454428729f450f7a0aabbf95ac62b46b78a Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Wed, 14 Jan 2009 20:47:30 -0800 Subject: netxen: fix endianness in firmware commands o Set restricted (little endian) data types in firmware command requests and responses. o Remove unnecessary conversion to LE when writing registers. Signed-off-by: Dhananjay Phadke Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic.h | 98 ++++++++++++++++++------------------ drivers/net/netxen/netxen_nic_ctx.c | 50 ++++++++---------- drivers/net/netxen/netxen_nic_hw.c | 42 +++++++++------- drivers/net/netxen/netxen_nic_init.c | 2 +- 4 files changed, 95 insertions(+), 97 deletions(-) diff --git a/drivers/net/netxen/netxen_nic.h b/drivers/net/netxen/netxen_nic.h index f8e601c51da..31311cc66d1 100644 --- a/drivers/net/netxen/netxen_nic.h +++ b/drivers/net/netxen/netxen_nic.h @@ -995,31 +995,31 @@ struct netxen_recv_context { */ typedef struct { - u64 host_phys_addr; /* Ring base addr */ - u32 ring_size; /* Ring entries */ - u16 msi_index; - u16 rsvd; /* Padding */ + __le64 host_phys_addr; /* Ring base addr */ + __le32 ring_size; /* Ring entries */ + __le16 msi_index; + __le16 rsvd; /* Padding */ } nx_hostrq_sds_ring_t; typedef struct { - u64 host_phys_addr; /* Ring base addr */ - u64 buff_size; /* Packet buffer size */ - u32 ring_size; /* Ring entries */ - u32 ring_kind; /* Class of ring */ + __le64 host_phys_addr; /* Ring base addr */ + __le64 buff_size; /* Packet buffer size */ + __le32 ring_size; /* Ring entries */ + __le32 ring_kind; /* Class of ring */ } nx_hostrq_rds_ring_t; typedef struct { - u64 host_rsp_dma_addr; /* Response dma'd here */ - u32 capabilities[4]; /* Flag bit vector */ - u32 host_int_crb_mode; /* Interrupt crb usage */ - u32 host_rds_crb_mode; /* RDS crb usage */ + __le64 host_rsp_dma_addr; /* Response dma'd here */ + __le32 capabilities[4]; /* Flag bit vector */ + __le32 host_int_crb_mode; /* Interrupt crb usage */ + __le32 host_rds_crb_mode; /* RDS crb usage */ /* These ring offsets are relative to data[0] below */ - u32 rds_ring_offset; /* Offset to RDS config */ - u32 sds_ring_offset; /* Offset to SDS config */ - u16 num_rds_rings; /* Count of RDS rings */ - u16 num_sds_rings; /* Count of SDS rings */ - u16 rsvd1; /* Padding */ - u16 rsvd2; /* Padding */ + __le32 rds_ring_offset; /* Offset to RDS config */ + __le32 sds_ring_offset; /* Offset to SDS config */ + __le16 num_rds_rings; /* Count of RDS rings */ + __le16 num_sds_rings; /* Count of SDS rings */ + __le16 rsvd1; /* Padding */ + __le16 rsvd2; /* Padding */ u8 reserved[128]; /* reserve space for future expansion*/ /* MUST BE 64-bit aligned. The following is packed: @@ -1029,24 +1029,24 @@ typedef struct { } nx_hostrq_rx_ctx_t; typedef struct { - u32 host_producer_crb; /* Crb to use */ - u32 rsvd1; /* Padding */ + __le32 host_producer_crb; /* Crb to use */ + __le32 rsvd1; /* Padding */ } nx_cardrsp_rds_ring_t; typedef struct { - u32 host_consumer_crb; /* Crb to use */ - u32 interrupt_crb; /* Crb to use */ + __le32 host_consumer_crb; /* Crb to use */ + __le32 interrupt_crb; /* Crb to use */ } nx_cardrsp_sds_ring_t; typedef struct { /* These ring offsets are relative to data[0] below */ - u32 rds_ring_offset; /* Offset to RDS config */ - u32 sds_ring_offset; /* Offset to SDS config */ - u32 host_ctx_state; /* Starting State */ - u32 num_fn_per_port; /* How many PCI fn share the port */ - u16 num_rds_rings; /* Count of RDS rings */ - u16 num_sds_rings; /* Count of SDS rings */ - u16 context_id; /* Handle for context */ + __le32 rds_ring_offset; /* Offset to RDS config */ + __le32 sds_ring_offset; /* Offset to SDS config */ + __le32 host_ctx_state; /* Starting State */ + __le32 num_fn_per_port; /* How many PCI fn share the port */ + __le16 num_rds_rings; /* Count of RDS rings */ + __le16 num_sds_rings; /* Count of SDS rings */ + __le16 context_id; /* Handle for context */ u8 phys_port; /* Physical id of port */ u8 virt_port; /* Virtual/Logical id of port */ u8 reserved[128]; /* save space for future expansion */ @@ -1072,34 +1072,34 @@ typedef struct { */ typedef struct { - u64 host_phys_addr; /* Ring base addr */ - u32 ring_size; /* Ring entries */ - u32 rsvd; /* Padding */ + __le64 host_phys_addr; /* Ring base addr */ + __le32 ring_size; /* Ring entries */ + __le32 rsvd; /* Padding */ } nx_hostrq_cds_ring_t; typedef struct { - u64 host_rsp_dma_addr; /* Response dma'd here */ - u64 cmd_cons_dma_addr; /* */ - u64 dummy_dma_addr; /* */ - u32 capabilities[4]; /* Flag bit vector */ - u32 host_int_crb_mode; /* Interrupt crb usage */ - u32 rsvd1; /* Padding */ - u16 rsvd2; /* Padding */ - u16 interrupt_ctl; - u16 msi_index; - u16 rsvd3; /* Padding */ + __le64 host_rsp_dma_addr; /* Response dma'd here */ + __le64 cmd_cons_dma_addr; /* */ + __le64 dummy_dma_addr; /* */ + __le32 capabilities[4]; /* Flag bit vector */ + __le32 host_int_crb_mode; /* Interrupt crb usage */ + __le32 rsvd1; /* Padding */ + __le16 rsvd2; /* Padding */ + __le16 interrupt_ctl; + __le16 msi_index; + __le16 rsvd3; /* Padding */ nx_hostrq_cds_ring_t cds_ring; /* Desc of cds ring */ u8 reserved[128]; /* future expansion */ } nx_hostrq_tx_ctx_t; typedef struct { - u32 host_producer_crb; /* Crb to use */ - u32 interrupt_crb; /* Crb to use */ + __le32 host_producer_crb; /* Crb to use */ + __le32 interrupt_crb; /* Crb to use */ } nx_cardrsp_cds_ring_t; typedef struct { - u32 host_ctx_state; /* Starting state */ - u16 context_id; /* Handle for context */ + __le32 host_ctx_state; /* Starting state */ + __le16 context_id; /* Handle for context */ u8 phys_port; /* Physical id of port */ u8 virt_port; /* Virtual/Logical id of port */ nx_cardrsp_cds_ring_t cds_ring; /* Card cds settings */ @@ -1202,9 +1202,9 @@ enum { #define VPORT_MISS_MODE_ACCEPT_MULTI 2 /* accept unmatched multicast */ typedef struct { - u64 qhdr; - u64 req_hdr; - u64 words[6]; + __le64 qhdr; + __le64 req_hdr; + __le64 words[6]; } nx_nic_req_t; typedef struct { diff --git a/drivers/net/netxen/netxen_nic_ctx.c b/drivers/net/netxen/netxen_nic_ctx.c index 64b51643c62..746bdb47041 100644 --- a/drivers/net/netxen/netxen_nic_ctx.c +++ b/drivers/net/netxen/netxen_nic_ctx.c @@ -76,7 +76,7 @@ netxen_api_unlock(struct netxen_adapter *adapter) static u32 netxen_poll_rsp(struct netxen_adapter *adapter) { - u32 raw_rsp, rsp = NX_CDRP_RSP_OK; + u32 rsp = NX_CDRP_RSP_OK; int timeout = 0; do { @@ -86,10 +86,7 @@ netxen_poll_rsp(struct netxen_adapter *adapter) if (++timeout > NX_OS_CRB_RETRY_COUNT) return NX_CDRP_RSP_TIMEOUT; - netxen_nic_read_w1(adapter, NX_CDRP_CRB_OFFSET, - &raw_rsp); - - rsp = le32_to_cpu(raw_rsp); + netxen_nic_read_w1(adapter, NX_CDRP_CRB_OFFSET, &rsp); } while (!NX_CDRP_IS_RSP(rsp)); return rsp; @@ -109,20 +106,16 @@ netxen_issue_cmd(struct netxen_adapter *adapter, if (netxen_api_lock(adapter)) return NX_RCODE_TIMEOUT; - netxen_nic_write_w1(adapter, NX_SIGN_CRB_OFFSET, - cpu_to_le32(signature)); + netxen_nic_write_w1(adapter, NX_SIGN_CRB_OFFSET, signature); - netxen_nic_write_w1(adapter, NX_ARG1_CRB_OFFSET, - cpu_to_le32(arg1)); + netxen_nic_write_w1(adapter, NX_ARG1_CRB_OFFSET, arg1); - netxen_nic_write_w1(adapter, NX_ARG2_CRB_OFFSET, - cpu_to_le32(arg2)); + netxen_nic_write_w1(adapter, NX_ARG2_CRB_OFFSET, arg2); - netxen_nic_write_w1(adapter, NX_ARG3_CRB_OFFSET, - cpu_to_le32(arg3)); + netxen_nic_write_w1(adapter, NX_ARG3_CRB_OFFSET, arg3); netxen_nic_write_w1(adapter, NX_CDRP_CRB_OFFSET, - cpu_to_le32(NX_CDRP_FORM_CMD(cmd))); + NX_CDRP_FORM_CMD(cmd)); rsp = netxen_poll_rsp(adapter); @@ -133,7 +126,6 @@ netxen_issue_cmd(struct netxen_adapter *adapter, rcode = NX_RCODE_TIMEOUT; } else if (rsp == NX_CDRP_RSP_FAIL) { netxen_nic_read_w1(adapter, NX_ARG1_CRB_OFFSET, &rcode); - rcode = le32_to_cpu(rcode); printk(KERN_ERR "%s: failed card response code:0x%x\n", netxen_nic_driver_name, rcode); @@ -183,7 +175,7 @@ nx_fw_cmd_create_rx_ctx(struct netxen_adapter *adapter) int i, nrds_rings, nsds_rings; size_t rq_size, rsp_size; - u32 cap, reg; + u32 cap, reg, val; int err; @@ -225,11 +217,14 @@ nx_fw_cmd_create_rx_ctx(struct netxen_adapter *adapter) prq->num_rds_rings = cpu_to_le16(nrds_rings); prq->num_sds_rings = cpu_to_le16(nsds_rings); - prq->rds_ring_offset = 0; - prq->sds_ring_offset = prq->rds_ring_offset + + prq->rds_ring_offset = cpu_to_le32(0); + + val = le32_to_cpu(prq->rds_ring_offset) + (sizeof(nx_hostrq_rds_ring_t) * nrds_rings); + prq->sds_ring_offset = cpu_to_le32(val); - prq_rds = (nx_hostrq_rds_ring_t *)(prq->data + prq->rds_ring_offset); + prq_rds = (nx_hostrq_rds_ring_t *)(prq->data + + le32_to_cpu(prq->rds_ring_offset)); for (i = 0; i < nrds_rings; i++) { @@ -241,17 +236,14 @@ nx_fw_cmd_create_rx_ctx(struct netxen_adapter *adapter) prq_rds[i].buff_size = cpu_to_le64(rds_ring->dma_size); } - prq_sds = (nx_hostrq_sds_ring_t *)(prq->data + prq->sds_ring_offset); + prq_sds = (nx_hostrq_sds_ring_t *)(prq->data + + le32_to_cpu(prq->sds_ring_offset)); prq_sds[0].host_phys_addr = cpu_to_le64(recv_ctx->rcv_status_desc_phys_addr); prq_sds[0].ring_size = cpu_to_le32(adapter->max_rx_desc_count); /* only one msix vector for now */ - prq_sds[0].msi_index = cpu_to_le32(0); - - /* now byteswap offsets */ - prq->rds_ring_offset = cpu_to_le32(prq->rds_ring_offset); - prq->sds_ring_offset = cpu_to_le32(prq->sds_ring_offset); + prq_sds[0].msi_index = cpu_to_le16(0); phys_addr = hostrq_phys_addr; err = netxen_issue_cmd(adapter, @@ -269,9 +261,9 @@ nx_fw_cmd_create_rx_ctx(struct netxen_adapter *adapter) prsp_rds = ((nx_cardrsp_rds_ring_t *) - &prsp->data[prsp->rds_ring_offset]); + &prsp->data[le32_to_cpu(prsp->rds_ring_offset)]); - for (i = 0; i < le32_to_cpu(prsp->num_rds_rings); i++) { + for (i = 0; i < le16_to_cpu(prsp->num_rds_rings); i++) { rds_ring = &recv_ctx->rds_rings[i]; reg = le32_to_cpu(prsp_rds[i].host_producer_crb); @@ -279,7 +271,7 @@ nx_fw_cmd_create_rx_ctx(struct netxen_adapter *adapter) } prsp_sds = ((nx_cardrsp_sds_ring_t *) - &prsp->data[prsp->sds_ring_offset]); + &prsp->data[le32_to_cpu(prsp->sds_ring_offset)]); reg = le32_to_cpu(prsp_sds[0].host_consumer_crb); recv_ctx->crb_sts_consumer = NETXEN_NIC_REG(reg - 0x200); @@ -288,7 +280,7 @@ nx_fw_cmd_create_rx_ctx(struct netxen_adapter *adapter) recv_ctx->state = le32_to_cpu(prsp->host_ctx_state); recv_ctx->context_id = le16_to_cpu(prsp->context_id); - recv_ctx->virt_port = le16_to_cpu(prsp->virt_port); + recv_ctx->virt_port = prsp->virt_port; out_free_rsp: pci_free_consistent(adapter->pdev, rsp_size, prsp, cardrsp_phys_addr); diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c index aa6e603bfcb..e8a0eed0078 100644 --- a/drivers/net/netxen/netxen_nic_hw.c +++ b/drivers/net/netxen/netxen_nic_hw.c @@ -539,16 +539,19 @@ static int nx_p3_sre_macaddr_change(struct net_device *dev, { struct netxen_adapter *adapter = netdev_priv(dev); nx_nic_req_t req; - nx_mac_req_t mac_req; + nx_mac_req_t *mac_req; + u64 word; int rv; memset(&req, 0, sizeof(nx_nic_req_t)); - req.qhdr |= (NX_NIC_REQUEST << 23); - req.req_hdr |= NX_MAC_EVENT; - req.req_hdr |= ((u64)adapter->portnum << 16); - mac_req.op = op; - memcpy(&mac_req.mac_addr, addr, 6); - req.words[0] = cpu_to_le64(*(u64 *)&mac_req); + req.qhdr = cpu_to_le64(NX_NIC_REQUEST << 23); + + word = NX_MAC_EVENT | ((u64)adapter->portnum << 16); + req.req_hdr = cpu_to_le64(word); + + mac_req = (nx_mac_req_t *)&req.words[0]; + mac_req->op = op; + memcpy(mac_req->mac_addr, addr, 6); rv = netxen_send_cmd_descs(adapter, (struct cmd_desc_type0 *)&req, 1); if (rv != 0) { @@ -612,12 +615,16 @@ send_fw_cmd: int netxen_p3_nic_set_promisc(struct netxen_adapter *adapter, u32 mode) { nx_nic_req_t req; + u64 word; memset(&req, 0, sizeof(nx_nic_req_t)); - req.qhdr |= (NX_HOST_REQUEST << 23); - req.req_hdr |= NX_NIC_H2C_OPCODE_PROXY_SET_VPORT_MISS_MODE; - req.req_hdr |= ((u64)adapter->portnum << 16); + req.qhdr = cpu_to_le64(NX_HOST_REQUEST << 23); + + word = NX_NIC_H2C_OPCODE_PROXY_SET_VPORT_MISS_MODE | + ((u64)adapter->portnum << 16); + req.req_hdr = cpu_to_le64(word); + req.words[0] = cpu_to_le64(mode); return netxen_send_cmd_descs(adapter, @@ -632,13 +639,15 @@ int netxen_p3_nic_set_promisc(struct netxen_adapter *adapter, u32 mode) int netxen_config_intr_coalesce(struct netxen_adapter *adapter) { nx_nic_req_t req; + u64 word; int rv; memset(&req, 0, sizeof(nx_nic_req_t)); - req.qhdr |= (NX_NIC_REQUEST << 23); - req.req_hdr |= NETXEN_CONFIG_INTR_COALESCE; - req.req_hdr |= ((u64)adapter->portnum << 16); + req.qhdr = cpu_to_le64(NX_NIC_REQUEST << 23); + + word = NETXEN_CONFIG_INTR_COALESCE | ((u64)adapter->portnum << 16); + req.req_hdr = cpu_to_le64(word); memcpy(&req.words[0], &adapter->coal, sizeof(adapter->coal)); @@ -772,13 +781,10 @@ int netxen_p3_get_mac_addr(struct netxen_adapter *adapter, __le64 *mac) adapter->hw_read_wx(adapter, crbaddr, &mac_lo, 4); adapter->hw_read_wx(adapter, crbaddr+4, &mac_hi, 4); - mac_hi = cpu_to_le32(mac_hi); - mac_lo = cpu_to_le32(mac_lo); - if (pci_func & 1) - *mac = ((mac_lo >> 16) | ((u64)mac_hi << 16)); + *mac = le64_to_cpu((mac_lo >> 16) | ((u64)mac_hi << 16)); else - *mac = ((mac_lo) | ((u64)mac_hi << 32)); + *mac = le64_to_cpu((u64)mac_lo | ((u64)mac_hi << 32)); return 0; } diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c index d924468e506..c0e06a65317 100644 --- a/drivers/net/netxen/netxen_nic_init.c +++ b/drivers/net/netxen/netxen_nic_init.c @@ -1277,7 +1277,7 @@ static void netxen_process_rcv(struct netxen_adapter *adapter, int ctxid, dev_kfree_skb_any(skb); for (i = 0; i < nr_frags; i++) { - index = frag_desc->frag_handles[i]; + index = le16_to_cpu(frag_desc->frag_handles[i]); skb = netxen_process_rxbuf(adapter, rds_ring, index, cksum); if (skb) -- cgit From 391587c3447d99b842a647f8e701895c9eea050b Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Wed, 14 Jan 2009 20:48:11 -0800 Subject: netxen: fix ipv6 offload and tx cleanup o fix the ip/tcp hdr offset in tx descriptors for ipv6. o cleanup xmit function, move the tso checks into separate function, this reduces unnecessary endian conversions back and forth. o optimize macros to initialize tx descriptors. Signed-off-by: Dhananjay Phadke Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic.h | 43 +++++---------- drivers/net/netxen/netxen_nic_hw.c | 4 -- drivers/net/netxen/netxen_nic_main.c | 101 ++++++++++++++++------------------- 3 files changed, 57 insertions(+), 91 deletions(-) diff --git a/drivers/net/netxen/netxen_nic.h b/drivers/net/netxen/netxen_nic.h index 31311cc66d1..acb2ac971ca 100644 --- a/drivers/net/netxen/netxen_nic.h +++ b/drivers/net/netxen/netxen_nic.h @@ -308,27 +308,16 @@ struct netxen_ring_ctx { #define netxen_set_cmd_desc_ctxid(cmd_desc, var) \ ((cmd_desc)->port_ctxid |= ((var) << 4 & 0xF0)) -#define netxen_set_cmd_desc_flags(cmd_desc, val) \ - (cmd_desc)->flags_opcode = ((cmd_desc)->flags_opcode & \ - ~cpu_to_le16(0x7f)) | cpu_to_le16((val) & 0x7f) -#define netxen_set_cmd_desc_opcode(cmd_desc, val) \ - (cmd_desc)->flags_opcode = ((cmd_desc)->flags_opcode & \ - ~cpu_to_le16((u16)0x3f << 7)) | cpu_to_le16(((val) & 0x3f) << 7) - -#define netxen_set_cmd_desc_num_of_buff(cmd_desc, val) \ - (cmd_desc)->num_of_buffers_total_length = \ - ((cmd_desc)->num_of_buffers_total_length & \ - ~cpu_to_le32(0xff)) | cpu_to_le32((val) & 0xff) -#define netxen_set_cmd_desc_totallength(cmd_desc, val) \ - (cmd_desc)->num_of_buffers_total_length = \ - ((cmd_desc)->num_of_buffers_total_length & \ - ~cpu_to_le32((u32)0xffffff << 8)) | \ - cpu_to_le32(((val) & 0xffffff) << 8) - -#define netxen_get_cmd_desc_opcode(cmd_desc) \ - ((le16_to_cpu((cmd_desc)->flags_opcode) >> 7) & 0x003f) -#define netxen_get_cmd_desc_totallength(cmd_desc) \ - ((le32_to_cpu((cmd_desc)->num_of_buffers_total_length) >> 8) & 0xffffff) +#define netxen_set_tx_port(_desc, _port) \ + (_desc)->port_ctxid = ((_port) & 0xf) | (((_port) << 4) & 0xf0) + +#define netxen_set_tx_flags_opcode(_desc, _flags, _opcode) \ + (_desc)->flags_opcode = \ + cpu_to_le16(((_flags) & 0x7f) | (((_opcode) & 0x3f) << 7)) + +#define netxen_set_tx_frags_len(_desc, _frags, _len) \ + (_desc)->num_of_buffers_total_length = \ + cpu_to_le32(((_frags) & 0xff) | (((_len) & 0xffffff) << 8)) struct cmd_desc_type0 { u8 tcp_hdr_offset; /* For LSO only */ @@ -757,7 +746,7 @@ extern char netxen_nic_driver_name[]; */ struct netxen_skb_frag { u64 dma; - u32 length; + ulong length; }; #define _netxen_set_bits(config_word, start, bits, val) {\ @@ -783,13 +772,7 @@ struct netxen_skb_frag { struct netxen_cmd_buffer { struct sk_buff *skb; struct netxen_skb_frag frag_array[MAX_BUFFERS_PER_CMD + 1]; - u32 total_length; - u32 mss; - u16 port; - u8 cmd; - u8 frag_count; - unsigned long time_stamp; - u32 state; + u32 frag_count; }; /* In rx_buffer, we do not need multiple fragments as is a single buffer */ @@ -1486,8 +1469,6 @@ void netxen_release_tx_buffers(struct netxen_adapter *adapter); void netxen_initialize_adapter_ops(struct netxen_adapter *adapter); int netxen_init_firmware(struct netxen_adapter *adapter); -void netxen_tso_check(struct netxen_adapter *adapter, - struct cmd_desc_type0 *desc, struct sk_buff *skb); void netxen_nic_clear_stats(struct netxen_adapter *adapter); void netxen_watchdog_task(struct work_struct *work); void netxen_post_rx_buffers(struct netxen_adapter *adapter, u32 ctx, diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c index e8a0eed0078..98d0bcda5f4 100644 --- a/drivers/net/netxen/netxen_nic_hw.c +++ b/drivers/net/netxen/netxen_nic_hw.c @@ -508,12 +508,8 @@ netxen_send_cmd_descs(struct netxen_adapter *adapter, cmd_desc = &cmd_desc_arr[i]; pbuf = &adapter->cmd_buf_arr[producer]; - pbuf->mss = 0; - pbuf->total_length = 0; pbuf->skb = NULL; - pbuf->cmd = 0; pbuf->frag_count = 0; - pbuf->port = 0; /* adapter->ahw.cmd_desc_head[producer] = *cmd_desc; */ memcpy(&adapter->ahw.cmd_desc_head[producer], diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index ba01524b553..cb391238180 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -39,6 +39,7 @@ #include "netxen_nic_phan_reg.h" #include +#include #include MODULE_DESCRIPTION("NetXen Multi port (1/10) Gigabit Network Driver"); @@ -1137,29 +1138,46 @@ static int netxen_nic_close(struct net_device *netdev) return 0; } -void netxen_tso_check(struct netxen_adapter *adapter, +static bool netxen_tso_check(struct net_device *netdev, struct cmd_desc_type0 *desc, struct sk_buff *skb) { - if (desc->mss) { - desc->total_hdr_length = (sizeof(struct ethhdr) + - ip_hdrlen(skb) + tcp_hdrlen(skb)); + bool tso = false; + u8 opcode = TX_ETHER_PKT; - if ((NX_IS_REVISION_P3(adapter->ahw.revision_id)) && - (skb->protocol == htons(ETH_P_IPV6))) - netxen_set_cmd_desc_opcode(desc, TX_TCP_LSO6); - else - netxen_set_cmd_desc_opcode(desc, TX_TCP_LSO); + if ((netdev->features & (NETIF_F_TSO | NETIF_F_TSO6)) && + skb_shinfo(skb)->gso_size > 0) { + + desc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size); + desc->total_hdr_length = + skb_transport_offset(skb) + tcp_hdrlen(skb); + + opcode = (skb->protocol == htons(ETH_P_IPV6)) ? + TX_TCP_LSO6 : TX_TCP_LSO; + tso = true; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (ip_hdr(skb)->protocol == IPPROTO_TCP) - netxen_set_cmd_desc_opcode(desc, TX_TCP_PKT); - else if (ip_hdr(skb)->protocol == IPPROTO_UDP) - netxen_set_cmd_desc_opcode(desc, TX_UDP_PKT); - else - return; + u8 l4proto; + + if (skb->protocol == htons(ETH_P_IP)) { + l4proto = ip_hdr(skb)->protocol; + + if (l4proto == IPPROTO_TCP) + opcode = TX_TCP_PKT; + else if(l4proto == IPPROTO_UDP) + opcode = TX_UDP_PKT; + } else if (skb->protocol == htons(ETH_P_IPV6)) { + l4proto = ipv6_hdr(skb)->nexthdr; + + if (l4proto == IPPROTO_TCP) + opcode = TX_TCPV6_PKT; + else if(l4proto == IPPROTO_UDP) + opcode = TX_UDPV6_PKT; + } } desc->tcp_hdr_offset = skb_transport_offset(skb); desc->ip_hdr_offset = skb_network_offset(skb); + netxen_set_tx_flags_opcode(desc, 0, opcode); + return tso; } static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) @@ -1167,33 +1185,20 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) struct netxen_adapter *adapter = netdev_priv(netdev); struct netxen_hardware_context *hw = &adapter->ahw; unsigned int first_seg_len = skb->len - skb->data_len; + struct netxen_cmd_buffer *pbuf; struct netxen_skb_frag *buffrag; - unsigned int i; + struct cmd_desc_type0 *hwdesc; + int i, k; u32 producer, consumer; - u32 saved_producer = 0; - struct cmd_desc_type0 *hwdesc; - int k; - struct netxen_cmd_buffer *pbuf = NULL; - int frag_count; - int no_of_desc; + int frag_count, no_of_desc; u32 num_txd = adapter->max_tx_desc_count; + bool is_tso = false; frag_count = skb_shinfo(skb)->nr_frags + 1; /* There 4 fragments per descriptor */ no_of_desc = (frag_count + 3) >> 2; - if (netdev->features & (NETIF_F_TSO | NETIF_F_TSO6)) { - if (skb_shinfo(skb)->gso_size > 0) { - - no_of_desc++; - if ((ip_hdrlen(skb) + tcp_hdrlen(skb) + - sizeof(struct ethhdr)) > - (sizeof(struct cmd_desc_type0) - 2)) { - no_of_desc++; - } - } - } producer = adapter->cmd_producer; smp_mb(); @@ -1205,34 +1210,22 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } /* Copy the descriptors into the hardware */ - saved_producer = producer; hwdesc = &hw->cmd_desc_head[producer]; memset(hwdesc, 0, sizeof(struct cmd_desc_type0)); /* Take skb->data itself */ pbuf = &adapter->cmd_buf_arr[producer]; - if ((netdev->features & (NETIF_F_TSO | NETIF_F_TSO6)) && - skb_shinfo(skb)->gso_size > 0) { - pbuf->mss = skb_shinfo(skb)->gso_size; - hwdesc->mss = cpu_to_le16(skb_shinfo(skb)->gso_size); - } else { - pbuf->mss = 0; - hwdesc->mss = 0; - } - pbuf->total_length = skb->len; + + is_tso = netxen_tso_check(netdev, hwdesc, skb); + pbuf->skb = skb; - pbuf->cmd = TX_ETHER_PKT; pbuf->frag_count = frag_count; - pbuf->port = adapter->portnum; buffrag = &pbuf->frag_array[0]; buffrag->dma = pci_map_single(adapter->pdev, skb->data, first_seg_len, PCI_DMA_TODEVICE); buffrag->length = first_seg_len; - netxen_set_cmd_desc_totallength(hwdesc, skb->len); - netxen_set_cmd_desc_num_of_buff(hwdesc, frag_count); - netxen_set_cmd_desc_opcode(hwdesc, TX_ETHER_PKT); + netxen_set_tx_frags_len(hwdesc, frag_count, skb->len); + netxen_set_tx_port(hwdesc, adapter->portnum); - netxen_set_cmd_desc_port(hwdesc, adapter->portnum); - netxen_set_cmd_desc_ctxid(hwdesc, adapter->portnum); hwdesc->buffer1_length = cpu_to_le16(first_seg_len); hwdesc->addr_buffer1 = cpu_to_le64(buffrag->dma); @@ -1285,16 +1278,12 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } producer = get_next_index(producer, num_txd); - /* might change opcode to TX_TCP_LSO */ - netxen_tso_check(adapter, &hw->cmd_desc_head[saved_producer], skb); - /* For LSO, we need to copy the MAC/IP/TCP headers into * the descriptor ring */ - if (netxen_get_cmd_desc_opcode(&hw->cmd_desc_head[saved_producer]) - == TX_TCP_LSO) { + if (is_tso) { int hdr_len, first_hdr_len, more_hdr; - hdr_len = hw->cmd_desc_head[saved_producer].total_hdr_length; + hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); if (hdr_len > (sizeof(struct cmd_desc_type0) - 2)) { first_hdr_len = sizeof(struct cmd_desc_type0) - 2; more_hdr = 1; -- cgit From c7860a2aec571ea95d3ad19b8d9775b27828baac Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Wed, 14 Jan 2009 20:48:32 -0800 Subject: netxen: fix link speed reporting for some boards o Read negotiated link speed when link state changes. o Fix link speed reporting for hybrid nic boards, which have both 1Gbps and 10Gbps ports. Signed-off-by: Dhananjay Phadke Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic.h | 3 ++- drivers/net/netxen/netxen_nic_ethtool.c | 31 +++++++++++++++++++++++-------- drivers/net/netxen/netxen_nic_hw.c | 28 ++++++++++++++++++++-------- drivers/net/netxen/netxen_nic_main.c | 14 +++++++++++++- 4 files changed, 58 insertions(+), 18 deletions(-) diff --git a/drivers/net/netxen/netxen_nic.h b/drivers/net/netxen/netxen_nic.h index acb2ac971ca..a674a23f72b 100644 --- a/drivers/net/netxen/netxen_nic.h +++ b/drivers/net/netxen/netxen_nic.h @@ -499,7 +499,8 @@ typedef enum { NETXEN_BRDTYPE_P3_10G_SFP_CT = 0x002a, NETXEN_BRDTYPE_P3_10G_SFP_QT = 0x002b, NETXEN_BRDTYPE_P3_10G_CX4 = 0x0031, - NETXEN_BRDTYPE_P3_10G_XFP = 0x0032 + NETXEN_BRDTYPE_P3_10G_XFP = 0x0032, + NETXEN_BRDTYPE_P3_10G_TP = 0x0080 } netxen_brdtype_t; diff --git a/drivers/net/netxen/netxen_nic_ethtool.c b/drivers/net/netxen/netxen_nic_ethtool.c index e45ce295172..c0bd40fcf70 100644 --- a/drivers/net/netxen/netxen_nic_ethtool.c +++ b/drivers/net/netxen/netxen_nic_ethtool.c @@ -136,11 +136,9 @@ netxen_nic_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) ecmd->port = PORT_TP; - if (netif_running(dev)) { - ecmd->speed = adapter->link_speed; - ecmd->duplex = adapter->link_duplex; - ecmd->autoneg = adapter->link_autoneg; - } + ecmd->speed = adapter->link_speed; + ecmd->duplex = adapter->link_duplex; + ecmd->autoneg = adapter->link_autoneg; } else if (adapter->ahw.board_type == NETXEN_NIC_XGBE) { u32 val; @@ -171,7 +169,7 @@ netxen_nic_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) } else return -EIO; - ecmd->phy_address = adapter->portnum; + ecmd->phy_address = adapter->physical_port; ecmd->transceiver = XCVR_EXTERNAL; switch ((netxen_brdtype_t) boardinfo->board_type) { @@ -180,13 +178,13 @@ netxen_nic_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) case NETXEN_BRDTYPE_P3_REF_QG: case NETXEN_BRDTYPE_P3_4_GB: case NETXEN_BRDTYPE_P3_4_GB_MM: - case NETXEN_BRDTYPE_P3_10000_BASE_T: ecmd->supported |= SUPPORTED_Autoneg; ecmd->advertising |= ADVERTISED_Autoneg; case NETXEN_BRDTYPE_P2_SB31_10G_CX4: case NETXEN_BRDTYPE_P3_10G_CX4: case NETXEN_BRDTYPE_P3_10G_CX4_LP: + case NETXEN_BRDTYPE_P3_10000_BASE_T: ecmd->supported |= SUPPORTED_TP; ecmd->advertising |= ADVERTISED_TP; ecmd->port = PORT_TP; @@ -204,16 +202,33 @@ netxen_nic_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) ecmd->port = PORT_FIBRE; ecmd->autoneg = AUTONEG_DISABLE; break; - case NETXEN_BRDTYPE_P2_SB31_10G: case NETXEN_BRDTYPE_P3_10G_SFP_PLUS: case NETXEN_BRDTYPE_P3_10G_SFP_CT: case NETXEN_BRDTYPE_P3_10G_SFP_QT: + ecmd->advertising |= ADVERTISED_TP; + ecmd->supported |= SUPPORTED_TP; + case NETXEN_BRDTYPE_P2_SB31_10G: case NETXEN_BRDTYPE_P3_10G_XFP: ecmd->supported |= SUPPORTED_FIBRE; ecmd->advertising |= ADVERTISED_FIBRE; ecmd->port = PORT_FIBRE; ecmd->autoneg = AUTONEG_DISABLE; break; + case NETXEN_BRDTYPE_P3_10G_TP: + if (adapter->ahw.board_type == NETXEN_NIC_XGBE) { + ecmd->autoneg = AUTONEG_DISABLE; + ecmd->supported |= (SUPPORTED_FIBRE | SUPPORTED_TP); + ecmd->advertising |= + (ADVERTISED_FIBRE | ADVERTISED_TP); + ecmd->port = PORT_FIBRE; + } else { + ecmd->autoneg = AUTONEG_ENABLE; + ecmd->supported |= (SUPPORTED_TP |SUPPORTED_Autoneg); + ecmd->advertising |= + (ADVERTISED_TP | ADVERTISED_Autoneg); + ecmd->port = PORT_TP; + } + break; default: printk(KERN_ERR "netxen-nic: Unsupported board model %d\n", (netxen_brdtype_t) boardinfo->board_type); diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c index 98d0bcda5f4..4276f7f8223 100644 --- a/drivers/net/netxen/netxen_nic_hw.c +++ b/drivers/net/netxen/netxen_nic_hw.c @@ -2036,7 +2036,13 @@ int netxen_nic_get_board_info(struct netxen_adapter *adapter) rv = -1; } - DPRINTK(INFO, "Discovered board type:0x%x ", boardinfo->board_type); + if (boardinfo->board_type == NETXEN_BRDTYPE_P3_4_GB_MM) { + u32 gpio = netxen_nic_reg_read(adapter, + NETXEN_ROMUSB_GLB_PAD_GPIO_I); + if ((gpio & 0x8000) == 0) + boardinfo->board_type = NETXEN_BRDTYPE_P3_10G_TP; + } + switch ((netxen_brdtype_t) boardinfo->board_type) { case NETXEN_BRDTYPE_P2_SB35_4G: adapter->ahw.board_type = NETXEN_NIC_GBE; @@ -2055,7 +2061,6 @@ int netxen_nic_get_board_info(struct netxen_adapter *adapter) case NETXEN_BRDTYPE_P3_10G_SFP_QT: case NETXEN_BRDTYPE_P3_10G_XFP: case NETXEN_BRDTYPE_P3_10000_BASE_T: - adapter->ahw.board_type = NETXEN_NIC_XGBE; break; case NETXEN_BRDTYPE_P1_BD: @@ -2065,9 +2070,12 @@ int netxen_nic_get_board_info(struct netxen_adapter *adapter) case NETXEN_BRDTYPE_P3_REF_QG: case NETXEN_BRDTYPE_P3_4_GB: case NETXEN_BRDTYPE_P3_4_GB_MM: - adapter->ahw.board_type = NETXEN_NIC_GBE; break; + case NETXEN_BRDTYPE_P3_10G_TP: + adapter->ahw.board_type = (adapter->portnum < 2) ? + NETXEN_NIC_XGBE : NETXEN_NIC_GBE; + break; default: printk("%s: Unknown(%x)\n", netxen_nic_driver_name, boardinfo->board_type); @@ -2112,12 +2120,16 @@ void netxen_nic_set_link_parameters(struct netxen_adapter *adapter) { __u32 status; __u32 autoneg; - __u32 mode; __u32 port_mode; - netxen_nic_read_w0(adapter, NETXEN_NIU_MODE, &mode); - if (netxen_get_niu_enable_ge(mode)) { /* Gb 10/100/1000 Mbps mode */ + if (!netif_carrier_ok(adapter->netdev)) { + adapter->link_speed = 0; + adapter->link_duplex = -1; + adapter->link_autoneg = AUTONEG_ENABLE; + return; + } + if (adapter->ahw.board_type == NETXEN_NIC_GBE) { adapter->hw_read_wx(adapter, NETXEN_PORT_MODE_ADDR, &port_mode, 4); if (port_mode == NETXEN_PORT_MODE_802_3_AP) { @@ -2143,7 +2155,7 @@ void netxen_nic_set_link_parameters(struct netxen_adapter *adapter) adapter->link_speed = SPEED_1000; break; default: - adapter->link_speed = -1; + adapter->link_speed = 0; break; } switch (netxen_get_phy_duplex(status)) { @@ -2166,7 +2178,7 @@ void netxen_nic_set_link_parameters(struct netxen_adapter *adapter) goto link_down; } else { link_down: - adapter->link_speed = -1; + adapter->link_speed = 0; adapter->link_duplex = -1; } } diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index cb391238180..2c6ce6ffde0 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -243,7 +243,7 @@ static void netxen_check_options(struct netxen_adapter *adapter) case NETXEN_BRDTYPE_P3_4_GB: case NETXEN_BRDTYPE_P3_4_GB_MM: adapter->msix_supported = !!use_msi_x; - adapter->max_rx_desc_count = MAX_RCV_DESCRIPTORS_10G; + adapter->max_rx_desc_count = MAX_RCV_DESCRIPTORS_1G; break; case NETXEN_BRDTYPE_P2_SB35_4G: @@ -252,6 +252,14 @@ static void netxen_check_options(struct netxen_adapter *adapter) adapter->max_rx_desc_count = MAX_RCV_DESCRIPTORS_1G; break; + case NETXEN_BRDTYPE_P3_10G_TP: + adapter->msix_supported = !!use_msi_x; + if (adapter->ahw.board_type == NETXEN_NIC_XGBE) + adapter->max_rx_desc_count = MAX_RCV_DESCRIPTORS_10G; + else + adapter->max_rx_desc_count = MAX_RCV_DESCRIPTORS_1G; + break; + default: adapter->msix_supported = 0; adapter->max_rx_desc_count = MAX_RCV_DESCRIPTORS_1G; @@ -1396,6 +1404,8 @@ static void netxen_nic_handle_phy_intr(struct netxen_adapter *adapter) netif_carrier_off(netdev); netif_stop_queue(netdev); } + + netxen_nic_set_link_parameters(adapter); } else if (!adapter->ahw.linkup && linkup) { printk(KERN_INFO "%s: %s NIC Link is up\n", netxen_nic_driver_name, netdev->name); @@ -1404,6 +1414,8 @@ static void netxen_nic_handle_phy_intr(struct netxen_adapter *adapter) netif_carrier_on(netdev); netif_wake_queue(netdev); } + + netxen_nic_set_link_parameters(adapter); } } -- cgit From 27c915a4d843b90eb4065298969578d15e5e6ab0 Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Wed, 14 Jan 2009 20:49:00 -0800 Subject: netxen: firmware init fix o Fix order or rom register writes. o Reduce udelays when writing rom registers. This cuts the firmware init time by 40%. o Do not reset core/memory clocks when reinitializing driver. Firmware willl handle this when initialized. Signed-off-by: Dhananjay Phadke Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic_hw.c | 6 ++--- drivers/net/netxen/netxen_nic_init.c | 35 ++++++++++++++++----------- drivers/net/netxen/netxen_nic_main.c | 47 +++++++++++++++++++++--------------- 3 files changed, 51 insertions(+), 37 deletions(-) diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c index 4276f7f8223..511db2ac57c 100644 --- a/drivers/net/netxen/netxen_nic_hw.c +++ b/drivers/net/netxen/netxen_nic_hw.c @@ -939,7 +939,7 @@ int netxen_load_firmware(struct netxen_adapter *adapter) { int i; u32 data, size = 0; - u32 flashaddr = NETXEN_BOOTLD_START, memaddr = NETXEN_BOOTLD_START; + u32 flashaddr = NETXEN_BOOTLD_START; size = (NETXEN_IMAGE_START - NETXEN_BOOTLD_START)/4; @@ -951,10 +951,8 @@ int netxen_load_firmware(struct netxen_adapter *adapter) if (netxen_rom_fast_read(adapter, flashaddr, (int *)&data) != 0) return -EIO; - adapter->pci_mem_write(adapter, memaddr, &data, 4); + adapter->pci_mem_write(adapter, flashaddr, &data, 4); flashaddr += 4; - memaddr += 4; - cond_resched(); } msleep(1); diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c index c0e06a65317..a3203644b48 100644 --- a/drivers/net/netxen/netxen_nic_init.c +++ b/drivers/net/netxen/netxen_nic_init.c @@ -439,6 +439,8 @@ static int netxen_wait_rom_done(struct netxen_adapter *adapter) long timeout = 0; long done = 0; + cond_resched(); + while (done == 0) { done = netxen_nic_reg_read(adapter, NETXEN_ROMUSB_GLB_STATUS); done &= 2; @@ -533,12 +535,9 @@ static int do_rom_fast_write(struct netxen_adapter *adapter, int addr, static int do_rom_fast_read(struct netxen_adapter *adapter, int addr, int *valp) { - cond_resched(); - netxen_nic_reg_write(adapter, NETXEN_ROMUSB_ROM_ADDRESS, addr); - netxen_nic_reg_write(adapter, NETXEN_ROMUSB_ROM_ABYTE_CNT, 3); - udelay(100); /* prevent bursting on CRB */ netxen_nic_reg_write(adapter, NETXEN_ROMUSB_ROM_DUMMY_BYTE_CNT, 0); + netxen_nic_reg_write(adapter, NETXEN_ROMUSB_ROM_ABYTE_CNT, 3); netxen_nic_reg_write(adapter, NETXEN_ROMUSB_ROM_INSTR_OPCODE, 0xb); if (netxen_wait_rom_done(adapter)) { printk("Error waiting for rom done\n"); @@ -546,7 +545,7 @@ static int do_rom_fast_read(struct netxen_adapter *adapter, } /* reset abyte_cnt and dummy_byte_cnt */ netxen_nic_reg_write(adapter, NETXEN_ROMUSB_ROM_ABYTE_CNT, 0); - udelay(100); /* prevent bursting on CRB */ + udelay(10); netxen_nic_reg_write(adapter, NETXEN_ROMUSB_ROM_DUMMY_BYTE_CNT, 0); *valp = netxen_nic_reg_read(adapter, NETXEN_ROMUSB_ROM_RDATA); @@ -884,14 +883,16 @@ int netxen_flash_unlock(struct netxen_adapter *adapter) int netxen_pinit_from_rom(struct netxen_adapter *adapter, int verbose) { int addr, val; - int i, init_delay = 0; + int i, n, init_delay = 0; struct crb_addr_pair *buf; - unsigned offset, n; + unsigned offset; u32 off; /* resetall */ + rom_lock(adapter); netxen_crb_writelit_adapter(adapter, NETXEN_ROMUSB_GLB_SW_RESET, 0xffffffff); + netxen_rom_unlock(adapter); if (verbose) { if (netxen_rom_fast_read(adapter, NETXEN_BOARDTYPE, &val) == 0) @@ -910,7 +911,7 @@ int netxen_pinit_from_rom(struct netxen_adapter *adapter, int verbose) if (NX_IS_REVISION_P3(adapter->ahw.revision_id)) { if (netxen_rom_fast_read(adapter, 0, &n) != 0 || - (n != 0xcafecafeUL) || + (n != 0xcafecafe) || netxen_rom_fast_read(adapter, 4, &n) != 0) { printk(KERN_ERR "%s: ERROR Reading crb_init area: " "n: %08x\n", netxen_nic_driver_name, n); @@ -975,6 +976,14 @@ int netxen_pinit_from_rom(struct netxen_adapter *adapter, int verbose) /* do not reset PCI */ if (off == (ROMUSB_GLB + 0xbc)) continue; + if (off == (ROMUSB_GLB + 0xa8)) + continue; + if (off == (ROMUSB_GLB + 0xc8)) /* core clock */ + continue; + if (off == (ROMUSB_GLB + 0x24)) /* MN clock */ + continue; + if (off == (ROMUSB_GLB + 0x1c)) /* MS clock */ + continue; if (off == (NETXEN_CRB_PEG_NET_1 + 0x18)) buf[i].data = 0x1020; /* skip the function enable register */ @@ -992,23 +1001,21 @@ int netxen_pinit_from_rom(struct netxen_adapter *adapter, int verbose) continue; } + init_delay = 1; /* After writing this register, HW needs time for CRB */ /* to quiet down (else crb_window returns 0xffffffff) */ if (off == NETXEN_ROMUSB_GLB_SW_RESET) { - init_delay = 1; + init_delay = 1000; if (NX_IS_REVISION_P2(adapter->ahw.revision_id)) { /* hold xdma in reset also */ buf[i].data = NETXEN_NIC_XDMA_RESET; + buf[i].data = 0x8000ff; } } adapter->hw_write_wx(adapter, off, &buf[i].data, 4); - if (init_delay == 1) { - msleep(1000); - init_delay = 0; - } - msleep(1); + msleep(init_delay); } kfree(buf); diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index 2c6ce6ffde0..cbe2b3e814d 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -280,10 +280,15 @@ static void netxen_check_options(struct netxen_adapter *adapter) static int netxen_check_hw_init(struct netxen_adapter *adapter, int first_boot) { - int ret = 0; + u32 val, timeout; if (first_boot == 0x55555555) { /* This is the first boot after power up */ + adapter->pci_write_normalize(adapter, + NETXEN_CAM_RAM(0x1fc), NETXEN_BDINFO_MAGIC); + + if (!NX_IS_REVISION_P2(adapter->ahw.revision_id)) + return 0; /* PCI bus master workaround */ adapter->hw_read_wx(adapter, @@ -303,18 +308,26 @@ netxen_check_hw_init(struct netxen_adapter *adapter, int first_boot) /* clear the register for future unloads/loads */ adapter->pci_write_normalize(adapter, NETXEN_CAM_RAM(0x1fc), 0); - ret = -1; + return -EIO; } - if (NX_IS_REVISION_P2(adapter->ahw.revision_id)) { - /* Start P2 boot loader */ - adapter->pci_write_normalize(adapter, - NETXEN_CAM_RAM(0x1fc), NETXEN_BDINFO_MAGIC); - adapter->pci_write_normalize(adapter, - NETXEN_ROMUSB_GLB_PEGTUNE_DONE, 1); - } + /* Start P2 boot loader */ + val = adapter->pci_read_normalize(adapter, + NETXEN_ROMUSB_GLB_PEGTUNE_DONE); + adapter->pci_write_normalize(adapter, + NETXEN_ROMUSB_GLB_PEGTUNE_DONE, val | 0x1); + timeout = 0; + do { + msleep(1); + val = adapter->pci_read_normalize(adapter, + NETXEN_CAM_RAM(0x1fc)); + + if (++timeout > 5000) + return -EIO; + + } while (val == NETXEN_BDINFO_MAGIC); } - return ret; + return 0; } static void netxen_set_port_mode(struct netxen_adapter *adapter) @@ -793,8 +806,8 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) CRB_CMDPEG_STATE, 0); netxen_pinit_from_rom(adapter, 0); msleep(1); - netxen_load_firmware(adapter); } + netxen_load_firmware(adapter); if (NX_IS_REVISION_P3(revision_id)) netxen_pcie_strap_init(adapter); @@ -810,13 +823,6 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } - if ((first_boot == 0x55555555) && - (NX_IS_REVISION_P2(revision_id))) { - /* Unlock the HW, prompting the boot sequence */ - adapter->pci_write_normalize(adapter, - NETXEN_ROMUSB_GLB_PEGTUNE_DONE, 1); - } - err = netxen_initialize_adapter_offload(adapter); if (err) goto err_out_iounmap; @@ -830,7 +836,9 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adapter->pci_write_normalize(adapter, CRB_DRIVER_VERSION, i); /* Handshake with the card before we register the devices. */ - netxen_phantom_init(adapter, NETXEN_NIC_PEG_TUNE); + err = netxen_phantom_init(adapter, NETXEN_NIC_PEG_TUNE); + if (err) + goto err_out_free_offload; } /* first_driver */ @@ -934,6 +942,7 @@ err_out_disable_msi: if (adapter->flags & NETXEN_NIC_MSI_ENABLED) pci_disable_msi(pdev); +err_out_free_offload: if (first_driver) netxen_free_adapter_offload(adapter); -- cgit From 06e9d9f9783860fe4c602ef491f47211804ccc96 Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Wed, 14 Jan 2009 20:49:22 -0800 Subject: netxen: cleanup mac list on driver unload This fixes a tiny memory leak when driver is unloaded. The mac address list maintained in netxen_adapter needs to deleted when driver is going down. Signed-off-by: Dhananjay Phadke Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic.h | 1 + drivers/net/netxen/netxen_nic_hw.c | 13 +++++++++++++ drivers/net/netxen/netxen_nic_main.c | 3 +++ 3 files changed, 17 insertions(+) diff --git a/drivers/net/netxen/netxen_nic.h b/drivers/net/netxen/netxen_nic.h index a674a23f72b..6598a34b87d 100644 --- a/drivers/net/netxen/netxen_nic.h +++ b/drivers/net/netxen/netxen_nic.h @@ -1478,6 +1478,7 @@ int netxen_process_cmd_ring(struct netxen_adapter *adapter); u32 netxen_process_rcv_ring(struct netxen_adapter *adapter, int ctx, int max); void netxen_p2_nic_set_multi(struct net_device *netdev); void netxen_p3_nic_set_multi(struct net_device *netdev); +void netxen_p3_free_mac_list(struct netxen_adapter *adapter); int netxen_p3_nic_set_promisc(struct netxen_adapter *adapter, u32); int netxen_config_intr_coalesce(struct netxen_adapter *adapter); diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c index 511db2ac57c..e2d2a2fdbe1 100644 --- a/drivers/net/netxen/netxen_nic_hw.c +++ b/drivers/net/netxen/netxen_nic_hw.c @@ -627,6 +627,19 @@ int netxen_p3_nic_set_promisc(struct netxen_adapter *adapter, u32 mode) (struct cmd_desc_type0 *)&req, 1); } +void netxen_p3_free_mac_list(struct netxen_adapter *adapter) +{ + nx_mac_list_t *cur, *next; + + cur = adapter->mac_list; + + while (cur) { + next = cur->next; + kfree(cur); + cur = next; + } +} + #define NETXEN_CONFIG_INTR_COALESCE 3 /* diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index cbe2b3e814d..9268fd2fbac 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -986,6 +986,9 @@ static void __devexit netxen_nic_remove(struct pci_dev *pdev) netxen_free_hw_resources(adapter); netxen_release_rx_buffers(adapter); netxen_free_sw_resources(adapter); + + if (NX_IS_REVISION_P3(adapter->ahw.revision_id)) + netxen_p3_free_mac_list(adapter); } if (adapter->portnum == 0) -- cgit From 03e678ee968ae54b79c1580c2935895bd863ad95 Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Wed, 14 Jan 2009 20:49:43 -0800 Subject: netxen: hold tx lock while sending firmware commands Some firmware commands like mac address addition/deletion are sent on the transmit ring. So need to hold the tx lock before touching tx producer/consumer indices. Signed-off-by: Dhananjay Phadke Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic_hw.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c index e2d2a2fdbe1..821cff68b3f 100644 --- a/drivers/net/netxen/netxen_nic_hw.c +++ b/drivers/net/netxen/netxen_nic_hw.c @@ -503,6 +503,8 @@ netxen_send_cmd_descs(struct netxen_adapter *adapter, i = 0; + netif_tx_lock_bh(adapter->netdev); + producer = adapter->cmd_producer; do { cmd_desc = &cmd_desc_arr[i]; @@ -527,6 +529,8 @@ netxen_send_cmd_descs(struct netxen_adapter *adapter, netxen_nic_update_cmd_producer(adapter, adapter->cmd_producer); + netif_tx_unlock_bh(adapter->netdev); + return 0; } -- cgit From 6f70340698333f14b1d9c9e913c5de8f66b72c55 Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Wed, 14 Jan 2009 20:50:00 -0800 Subject: netxen: handle dma mapping failures o Bail out if pci_map_single() fails while replenishing rx ring. o Drop packet if pci_map_{single,page}() fail in tx. Signed-off-by: Dhananjay Phadke Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic.h | 1 - drivers/net/netxen/netxen_nic_init.c | 68 +++++++++++++++++------------------- drivers/net/netxen/netxen_nic_main.c | 38 ++++++++++++++++++-- 3 files changed, 67 insertions(+), 40 deletions(-) diff --git a/drivers/net/netxen/netxen_nic.h b/drivers/net/netxen/netxen_nic.h index 6598a34b87d..c11c568fd7d 100644 --- a/drivers/net/netxen/netxen_nic.h +++ b/drivers/net/netxen/netxen_nic.h @@ -860,7 +860,6 @@ struct nx_host_rds_ring { u32 skb_size; struct netxen_rx_buffer *rx_buf_arr; /* rx buffers for receive */ struct list_head free_list; - int begin_alloc; }; /* diff --git a/drivers/net/netxen/netxen_nic_init.c b/drivers/net/netxen/netxen_nic_init.c index a3203644b48..ca7c8d8050c 100644 --- a/drivers/net/netxen/netxen_nic_init.c +++ b/drivers/net/netxen/netxen_nic_init.c @@ -308,7 +308,6 @@ int netxen_alloc_sw_resources(struct netxen_adapter *adapter) } memset(rds_ring->rx_buf_arr, 0, RCV_BUFFSIZE); INIT_LIST_HEAD(&rds_ring->free_list); - rds_ring->begin_alloc = 0; /* * Now go through all of them, set reference handles * and put them in the queues. @@ -1435,7 +1434,6 @@ void netxen_post_rx_buffers(struct netxen_adapter *adapter, u32 ctx, u32 ringid) struct rcv_desc *pdesc; struct netxen_rx_buffer *buffer; int count = 0; - int index = 0; netxen_ctx_msg msg = 0; dma_addr_t dma; struct list_head *head; @@ -1443,7 +1441,6 @@ void netxen_post_rx_buffers(struct netxen_adapter *adapter, u32 ctx, u32 ringid) rds_ring = &recv_ctx->rds_rings[ringid]; producer = rds_ring->producer; - index = rds_ring->begin_alloc; head = &rds_ring->free_list; /* We can start writing rx descriptors into the phantom memory. */ @@ -1451,39 +1448,37 @@ void netxen_post_rx_buffers(struct netxen_adapter *adapter, u32 ctx, u32 ringid) skb = dev_alloc_skb(rds_ring->skb_size); if (unlikely(!skb)) { - rds_ring->begin_alloc = index; break; } + if (!adapter->ahw.cut_through) + skb_reserve(skb, 2); + + dma = pci_map_single(pdev, skb->data, + rds_ring->dma_size, PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(pdev, dma)) { + dev_kfree_skb_any(skb); + break; + } + + count++; buffer = list_entry(head->next, struct netxen_rx_buffer, list); list_del(&buffer->list); - count++; /* now there should be no failure */ - pdesc = &rds_ring->desc_head[producer]; - - if (!adapter->ahw.cut_through) - skb_reserve(skb, 2); - /* This will be setup when we receive the - * buffer after it has been filled FSL TBD TBD - * skb->dev = netdev; - */ - dma = pci_map_single(pdev, skb->data, rds_ring->dma_size, - PCI_DMA_FROMDEVICE); - pdesc->addr_buffer = cpu_to_le64(dma); buffer->skb = skb; buffer->state = NETXEN_BUFFER_BUSY; buffer->dma = dma; + /* make a rcv descriptor */ + pdesc = &rds_ring->desc_head[producer]; + pdesc->addr_buffer = cpu_to_le64(dma); pdesc->reference_handle = cpu_to_le16(buffer->ref_handle); pdesc->buffer_length = cpu_to_le32(rds_ring->dma_size); - DPRINTK(INFO, "done writing descripter\n"); - producer = - get_next_index(producer, rds_ring->max_rx_desc_count); - index = get_next_index(index, rds_ring->max_rx_desc_count); + + producer = get_next_index(producer, rds_ring->max_rx_desc_count); } /* if we did allocate buffers, then write the count to Phantom */ if (count) { - rds_ring->begin_alloc = index; rds_ring->producer = producer; /* Window = 1 */ adapter->pci_write_normalize(adapter, @@ -1522,49 +1517,50 @@ static void netxen_post_rx_buffers_nodb(struct netxen_adapter *adapter, struct rcv_desc *pdesc; struct netxen_rx_buffer *buffer; int count = 0; - int index = 0; struct list_head *head; + dma_addr_t dma; rds_ring = &recv_ctx->rds_rings[ringid]; producer = rds_ring->producer; - index = rds_ring->begin_alloc; head = &rds_ring->free_list; /* We can start writing rx descriptors into the phantom memory. */ while (!list_empty(head)) { skb = dev_alloc_skb(rds_ring->skb_size); if (unlikely(!skb)) { - rds_ring->begin_alloc = index; break; } + if (!adapter->ahw.cut_through) + skb_reserve(skb, 2); + + dma = pci_map_single(pdev, skb->data, + rds_ring->dma_size, PCI_DMA_FROMDEVICE); + if (pci_dma_mapping_error(pdev, dma)) { + dev_kfree_skb_any(skb); + break; + } + + count++; buffer = list_entry(head->next, struct netxen_rx_buffer, list); list_del(&buffer->list); - count++; /* now there should be no failure */ - pdesc = &rds_ring->desc_head[producer]; - if (!adapter->ahw.cut_through) - skb_reserve(skb, 2); buffer->skb = skb; buffer->state = NETXEN_BUFFER_BUSY; - buffer->dma = pci_map_single(pdev, skb->data, - rds_ring->dma_size, - PCI_DMA_FROMDEVICE); + buffer->dma = dma; /* make a rcv descriptor */ + pdesc = &rds_ring->desc_head[producer]; pdesc->reference_handle = cpu_to_le16(buffer->ref_handle); pdesc->buffer_length = cpu_to_le32(rds_ring->dma_size); pdesc->addr_buffer = cpu_to_le64(buffer->dma); - producer = - get_next_index(producer, rds_ring->max_rx_desc_count); - index = get_next_index(index, rds_ring->max_rx_desc_count); - buffer = &rds_ring->rx_buf_arr[index]; + + producer = get_next_index(producer, rds_ring->max_rx_desc_count); } /* if we did allocate buffers, then write the count to Phantom */ if (count) { - rds_ring->begin_alloc = index; rds_ring->producer = producer; /* Window = 1 */ adapter->pci_write_normalize(adapter, diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index 9268fd2fbac..86867405a36 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -1200,6 +1200,24 @@ static bool netxen_tso_check(struct net_device *netdev, return tso; } +static void +netxen_clean_tx_dma_mapping(struct pci_dev *pdev, + struct netxen_cmd_buffer *pbuf, int last) +{ + int k; + struct netxen_skb_frag *buffrag; + + buffrag = &pbuf->frag_array[0]; + pci_unmap_single(pdev, buffrag->dma, + buffrag->length, PCI_DMA_TODEVICE); + + for (k = 1; k < last; k++) { + buffrag = &pbuf->frag_array[k]; + pci_unmap_page(pdev, buffrag->dma, + buffrag->length, PCI_DMA_TODEVICE); + } +} + static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { struct netxen_adapter *adapter = netdev_priv(netdev); @@ -1208,6 +1226,8 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) struct netxen_cmd_buffer *pbuf; struct netxen_skb_frag *buffrag; struct cmd_desc_type0 *hwdesc; + struct pci_dev *pdev = adapter->pdev; + dma_addr_t temp_dma; int i, k; u32 producer, consumer; @@ -1240,8 +1260,12 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) pbuf->skb = skb; pbuf->frag_count = frag_count; buffrag = &pbuf->frag_array[0]; - buffrag->dma = pci_map_single(adapter->pdev, skb->data, first_seg_len, + temp_dma = pci_map_single(pdev, skb->data, first_seg_len, PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(pdev, temp_dma)) + goto drop_packet; + + buffrag->dma = temp_dma; buffrag->length = first_seg_len; netxen_set_tx_frags_len(hwdesc, frag_count, skb->len); netxen_set_tx_port(hwdesc, adapter->portnum); @@ -1253,7 +1277,6 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) struct skb_frag_struct *frag; int len, temp_len; unsigned long offset; - dma_addr_t temp_dma; /* move to next desc. if there is a need */ if ((i & 0x3) == 0) { @@ -1269,8 +1292,12 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) offset = frag->page_offset; temp_len = len; - temp_dma = pci_map_page(adapter->pdev, frag->page, offset, + temp_dma = pci_map_page(pdev, frag->page, offset, len, PCI_DMA_TODEVICE); + if (pci_dma_mapping_error(pdev, temp_dma)) { + netxen_clean_tx_dma_mapping(pdev, pbuf, i); + goto drop_packet; + } buffrag++; buffrag->dma = temp_dma; @@ -1345,6 +1372,11 @@ static int netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) netdev->trans_start = jiffies; return NETDEV_TX_OK; + +drop_packet: + adapter->stats.txdropped++; + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; } static int netxen_nic_check_temp(struct netxen_adapter *adapter) -- cgit From 2950e952920811be465ec95c6b56f03dc66a05c0 Mon Sep 17 00:00:00 2001 From: Jos-Vicente Gilabert Date: Wed, 14 Jan 2009 20:55:00 -0800 Subject: drivers/net/irda/irda-usb.c: fix buffer overflow Taken from http://bugzilla.kernel.org/show_bug.cgi?id=12397 We're doing an sprintf of an 11-char string into an 11-char buffer. Whoops. It breaks firmware uploading. Reported-by: Jos-Vicente Gilabert Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- drivers/net/irda/irda-usb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c index 29118f58a14..3a22dc41b65 100644 --- a/drivers/net/irda/irda-usb.c +++ b/drivers/net/irda/irda-usb.c @@ -1073,7 +1073,7 @@ static int stir421x_patch_device(struct irda_usb_cb *self) { unsigned int i; int ret; - char stir421x_fw_name[11]; + char stir421x_fw_name[12]; const struct firmware *fw; const unsigned char *fw_version_ptr; /* pointer to version string */ unsigned long fw_version = 0; -- cgit From 937f1ba56b4be37d9e2ad77412f95048662058d2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 14 Jan 2009 21:05:05 -0800 Subject: net: Add init_dummy_netdev() and fix EMAC driver using it This adds an init_dummy_netdev() function that gets a network device structure (allocation and lifetime entirely under caller's control) and initialize the minimum amount of fields so it can be used to schedule NAPI polls without registering a full blown interface. This is to be used by drivers that need to tie several hardware interfaces to a single NAPI poll scheduler due to HW limitations. It also updates the ibm_newemac driver to use that, this fixing the oops on 2.6.29 due to passing NULL as "dev" to netif_napi_add() Symbol is exported GPL only a I don't think we want binary drivers doing that sort of acrobatics (if we want them at all). Signed-off-by: Benjamin Herrenschmidt Tested-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ibm_newemac/mal.c | 4 +++- drivers/net/ibm_newemac/mal.h | 2 ++ include/linux/netdevice.h | 3 +++ net/core/dev.c | 39 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/drivers/net/ibm_newemac/mal.c b/drivers/net/ibm_newemac/mal.c index ecf9798987f..2a2fc17b287 100644 --- a/drivers/net/ibm_newemac/mal.c +++ b/drivers/net/ibm_newemac/mal.c @@ -613,7 +613,9 @@ static int __devinit mal_probe(struct of_device *ofdev, INIT_LIST_HEAD(&mal->list); spin_lock_init(&mal->lock); - netif_napi_add(NULL, &mal->napi, mal_poll, + init_dummy_netdev(&mal->dummy_dev); + + netif_napi_add(&mal->dummy_dev, &mal->napi, mal_poll, CONFIG_IBM_NEW_EMAC_POLL_WEIGHT); /* Load power-on reset defaults */ diff --git a/drivers/net/ibm_newemac/mal.h b/drivers/net/ibm_newemac/mal.h index 2f0a8736084..9ededfbf072 100644 --- a/drivers/net/ibm_newemac/mal.h +++ b/drivers/net/ibm_newemac/mal.h @@ -214,6 +214,8 @@ struct mal_instance { int index; spinlock_t lock; + struct net_device dummy_dev; + unsigned int features; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4647604c7ca..ec54785d34f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -795,6 +795,7 @@ struct net_device NETREG_UNREGISTERING, /* called unregister_netdevice */ NETREG_UNREGISTERED, /* completed unregister todo */ NETREG_RELEASED, /* called free_netdev */ + NETREG_DUMMY, /* dummy device for NAPI poll */ } reg_state; /* Called from unregister, can be used to call free_netdev */ @@ -1077,6 +1078,8 @@ extern void free_netdev(struct net_device *dev); extern void synchronize_net(void); extern int register_netdevice_notifier(struct notifier_block *nb); extern int unregister_netdevice_notifier(struct notifier_block *nb); +extern int init_dummy_netdev(struct net_device *dev); + extern int call_netdevice_notifiers(unsigned long val, struct net_device *dev); extern struct net_device *dev_get_by_index(struct net *net, int ifindex); extern struct net_device *__dev_get_by_index(struct net *net, int ifindex); diff --git a/net/core/dev.c b/net/core/dev.c index 60377b6c0a8..8d675975d85 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4430,6 +4430,45 @@ err_uninit: goto out; } +/** + * init_dummy_netdev - init a dummy network device for NAPI + * @dev: device to init + * + * This takes a network device structure and initialize the minimum + * amount of fields so it can be used to schedule NAPI polls without + * registering a full blown interface. This is to be used by drivers + * that need to tie several hardware interfaces to a single NAPI + * poll scheduler due to HW limitations. + */ +int init_dummy_netdev(struct net_device *dev) +{ + /* Clear everything. Note we don't initialize spinlocks + * are they aren't supposed to be taken by any of the + * NAPI code and this dummy netdev is supposed to be + * only ever used for NAPI polls + */ + memset(dev, 0, sizeof(struct net_device)); + + /* make sure we BUG if trying to hit standard + * register/unregister code path + */ + dev->reg_state = NETREG_DUMMY; + + /* initialize the ref count */ + atomic_set(&dev->refcnt, 1); + + /* NAPI wants this */ + INIT_LIST_HEAD(&dev->napi_list); + + /* a dummy interface is started by default */ + set_bit(__LINK_STATE_PRESENT, &dev->state); + set_bit(__LINK_STATE_START, &dev->state); + + return 0; +} +EXPORT_SYMBOL_GPL(init_dummy_netdev); + + /** * register_netdev - register a network device * @dev: device to register -- cgit From d57bc36e7aba9e3a00d154f5eff80ff596146fc4 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 14 Jan 2009 21:05:55 -0800 Subject: ax88796: start_xmit fix using net_device_ops This patch hooks up the start_xmit/tx_timeout/get_stats callbacks in the ax88796 driver since they no longer are installed by the lib8390 code. Without this patch the function dev_hard_start_xmit() crashes due to a start_xmit callback with the value NULL. While at it, update the ax88796 driver to make use of use of struct net_device_ops. Signed-off-by: Magnus Damm Signed-off-by: David S. Miller --- drivers/net/ax88796.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/drivers/net/ax88796.c b/drivers/net/ax88796.c index 337488ec707..a4eb6c40678 100644 --- a/drivers/net/ax88796.c +++ b/drivers/net/ax88796.c @@ -37,7 +37,10 @@ static int phy_debug = 0; #define __ei_open ax_ei_open #define __ei_close ax_ei_close #define __ei_poll ax_ei_poll +#define __ei_start_xmit ax_ei_start_xmit #define __ei_tx_timeout ax_ei_tx_timeout +#define __ei_get_stats ax_ei_get_stats +#define __ei_set_multicast_list ax_ei_set_multicast_list #define __ei_interrupt ax_ei_interrupt #define ____alloc_ei_netdev ax__alloc_ei_netdev #define __NS8390_init ax_NS8390_init @@ -623,6 +626,23 @@ static void ax_eeprom_register_write(struct eeprom_93cx6 *eeprom) } #endif +static const struct net_device_ops ax_netdev_ops = { + .ndo_open = ax_open, + .ndo_stop = ax_close, + .ndo_do_ioctl = ax_ioctl, + + .ndo_start_xmit = ax_ei_start_xmit, + .ndo_tx_timeout = ax_ei_tx_timeout, + .ndo_get_stats = ax_ei_get_stats, + .ndo_set_multicast_list = ax_ei_set_multicast_list, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = eth_mac_addr, + .ndo_change_mtu = eth_change_mtu, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = ax_ei_poll, +#endif +}; + /* setup code */ static void ax_initial_setup(struct net_device *dev, struct ei_device *ei_local) @@ -738,9 +758,7 @@ static int ax_init_dev(struct net_device *dev, int first_init) ei_status.get_8390_hdr = &ax_get_8390_hdr; ei_status.priv = 0; - dev->open = ax_open; - dev->stop = ax_close; - dev->do_ioctl = ax_ioctl; + dev->netdev_ops = &ax_netdev_ops; dev->ethtool_ops = &ax_ethtool_ops; ax->msg_enable = NETIF_MSG_LINK; @@ -753,9 +771,6 @@ static int ax_init_dev(struct net_device *dev, int first_init) ax->mii.mdio_write = ax_phy_write; ax->mii.dev = dev; -#ifdef CONFIG_NET_POLL_CONTROLLER - dev->poll_controller = ax_ei_poll; -#endif ax_NS8390_init(dev, 0); if (first_init) -- cgit From c53a6ee88b0a91bd012ef1b7988c0b93dae6f24d Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 14 Jan 2009 21:06:55 -0800 Subject: can: fix slowpath issue in hrtimer callback function Due to the loopback functionality in can_send() we can not invoke it from hardirq context which was done inside the bcm_tx_timeout_handler() hrtimer callback: [ 700.361154] [] warn_slowpath+0x80/0xb6 [ 700.361163] [] valid_state+0x125/0x136 [ 700.361171] [] mark_lock+0x18e/0x332 [ 700.361180] [] __lock_acquire+0x12e/0xb1e [ 700.361189] [] bcm_tx_timeout_handler+0x0/0xbc [can_bcm] [ 700.361198] [] dev_queue_xmit+0x191/0x479 [ 700.361206] [] __local_bh_disable+0x2b/0x64 [ 700.361213] [] dev_queue_xmit+0x191/0x479 [ 700.361225] [] can_send+0xd7/0x11a [can] [ 700.361235] [] bcm_can_tx+0x9d/0xd9 [can_bcm] [ 700.361245] [] bcm_tx_timeout_handler+0x6a/0xbc [can_bcm] [ 700.361255] [] bcm_tx_timeout_handler+0x0/0xbc [can_bcm] [ 700.361263] [] __run_hrtimer+0x5a/0x86 [ 700.361273] [] bcm_tx_timeout_handler+0x0/0xbc [can_bcm] [ 700.361282] [] hrtimer_interrupt+0xb9/0x110 This patch moves the rest of the functionality from the hrtimer callback to the already existing tasklet to fix this slowpath problem. Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- net/can/bcm.c | 57 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/net/can/bcm.c b/net/can/bcm.c index 1649c8ab2c2..b7c7d465113 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -347,51 +347,54 @@ static void bcm_tx_timeout_tsklet(unsigned long data) struct bcm_op *op = (struct bcm_op *)data; struct bcm_msg_head msg_head; - /* create notification to user */ - msg_head.opcode = TX_EXPIRED; - msg_head.flags = op->flags; - msg_head.count = op->count; - msg_head.ival1 = op->ival1; - msg_head.ival2 = op->ival2; - msg_head.can_id = op->can_id; - msg_head.nframes = 0; - - bcm_send_to_user(op, &msg_head, NULL, 0); -} - -/* - * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions - */ -static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) -{ - struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); - enum hrtimer_restart ret = HRTIMER_NORESTART; - if (op->kt_ival1.tv64 && (op->count > 0)) { op->count--; - if (!op->count && (op->flags & TX_COUNTEVT)) - tasklet_schedule(&op->tsklet); + if (!op->count && (op->flags & TX_COUNTEVT)) { + + /* create notification to user */ + msg_head.opcode = TX_EXPIRED; + msg_head.flags = op->flags; + msg_head.count = op->count; + msg_head.ival1 = op->ival1; + msg_head.ival2 = op->ival2; + msg_head.can_id = op->can_id; + msg_head.nframes = 0; + + bcm_send_to_user(op, &msg_head, NULL, 0); + } } if (op->kt_ival1.tv64 && (op->count > 0)) { /* send (next) frame */ bcm_can_tx(op); - hrtimer_forward(hrtimer, ktime_get(), op->kt_ival1); - ret = HRTIMER_RESTART; + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival1), + HRTIMER_MODE_ABS); } else { if (op->kt_ival2.tv64) { /* send (next) frame */ bcm_can_tx(op); - hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2); - ret = HRTIMER_RESTART; + hrtimer_start(&op->timer, + ktime_add(ktime_get(), op->kt_ival2), + HRTIMER_MODE_ABS); } } +} - return ret; +/* + * bcm_tx_timeout_handler - performes cyclic CAN frame transmissions + */ +static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) +{ + struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); + + tasklet_schedule(&op->tsklet); + + return HRTIMER_NORESTART; } /* -- cgit From 7e86c0e6850504ec9516b953f316a47277825e33 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Thu, 15 Jan 2009 10:21:23 +0100 Subject: sound: virtuoso: do not overwrite EEPROM on Xonar D2/D2X On the Asus Xonar D2 and D2X models, the SPI chip select signal for the fourth DAC shares its pin with the serial clock for the EEPROM that contains the PCI subdevice ID values. It appears that when DAC registers are written and some other unknown conditions occur (probably noise on the EEPROM's chip select line), the EEPROM gets overwritten with garbage, which makes it impossible to properly detect the card later. Therefore, we better avoid DAC register writes and make sure that the driver works with the DAC's registers' default values. Consequently, the sample format is now I2S instead of left-justified (no user-visible change), and the DAC's volume/mute registers cannot be used anymore (volume changes are now done by the software volume plugin). Signed-off-by: Clemens Ladisch Cc: Signed-off-by: Takashi Iwai --- sound/pci/oxygen/virtuoso.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/sound/pci/oxygen/virtuoso.c b/sound/pci/oxygen/virtuoso.c index 98c6a8c65d8..e9e829e83d7 100644 --- a/sound/pci/oxygen/virtuoso.c +++ b/sound/pci/oxygen/virtuoso.c @@ -26,7 +26,7 @@ * SPI 0 -> 1st PCM1796 (front) * SPI 1 -> 2nd PCM1796 (surround) * SPI 2 -> 3rd PCM1796 (center/LFE) - * SPI 4 -> 4th PCM1796 (back) + * SPI 4 -> 4th PCM1796 (back) and EEPROM self-destruct (do not use!) * * GPIO 2 -> M0 of CS5381 * GPIO 3 -> M1 of CS5381 @@ -207,6 +207,12 @@ static void xonar_gpio_changed(struct oxygen *chip); static inline void pcm1796_write_spi(struct oxygen *chip, unsigned int codec, u8 reg, u8 value) { + /* + * We don't want to do writes on SPI 4 because the EEPROM, which shares + * the same pin, might get confused and broken. We'd better take care + * that the driver works with the default register values ... + */ +#if 0 /* maps ALSA channel pair number to SPI output */ static const u8 codec_map[4] = { 0, 1, 2, 4 @@ -217,6 +223,7 @@ static inline void pcm1796_write_spi(struct oxygen *chip, unsigned int codec, (codec_map[codec] << OXYGEN_SPI_CODEC_SHIFT) | OXYGEN_SPI_CEN_LATCH_CLOCK_HI, (reg << 8) | value); +#endif } static inline void pcm1796_write_i2c(struct oxygen *chip, unsigned int codec, @@ -750,6 +757,9 @@ static const DECLARE_TLV_DB_SCALE(cs4362a_db_scale, -12700, 100, 0); static int xonar_d2_control_filter(struct snd_kcontrol_new *template) { + if (!strncmp(template->name, "Master Playback ", 16)) + /* disable volume/mute because they would require SPI writes */ + return 1; if (!strncmp(template->name, "CD Capture ", 11)) /* CD in is actually connected to the video in pin */ template->private_value ^= AC97_CD ^ AC97_VIDEO; @@ -840,9 +850,8 @@ static const struct oxygen_model model_xonar_d2 = { .dac_volume_min = 0x0f, .dac_volume_max = 0xff, .misc_flags = OXYGEN_MISC_MIDI, - .function_flags = OXYGEN_FUNCTION_SPI | - OXYGEN_FUNCTION_ENABLE_SPI_4_5, - .dac_i2s_format = OXYGEN_I2S_FORMAT_LJUST, + .function_flags = OXYGEN_FUNCTION_SPI, + .dac_i2s_format = OXYGEN_I2S_FORMAT_I2S, .adc_i2s_format = OXYGEN_I2S_FORMAT_LJUST, }; -- cgit From 98a4826b99bc4bcc34c604b2fc4fcf4d771600ec Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 14 Jan 2009 10:56:32 +0100 Subject: sched: fix bandwidth validation for UID grouping Impact: make rt-limit tunables work again Mark Glines reported: > I've got an issue on x86-64 where I can't configure the system to allow > RT tasks for a non-root user. > > In 2.6.26.5, I was able to do the following to set things up nicely: > echo 450000 >/sys/kernel/uids/0/cpu_rt_runtime > echo 450000 >/sys/kernel/uids/1000/cpu_rt_runtime > > Seems like every value I try to echo into the /sys files returns EINVAL. For UID grouping we initialize the root group with infinite bandwidth which by default is actually more than the global limit, therefore the bandwidth check always fails. Because the root group is a phantom group (for UID grouping) we cannot runtime adjust it, therefore we let it reflect the global bandwidth settings. Reported-by: Mark Glines Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/sched.c b/kernel/sched.c index 3b630d88266..ed62d1cee05 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9050,6 +9050,13 @@ static int tg_schedulable(struct task_group *tg, void *data) runtime = d->rt_runtime; } +#ifdef CONFIG_USER_SCHED + if (tg == &root_task_group) { + period = global_rt_period(); + runtime = global_rt_runtime(); + } +#endif + /* * Cannot have more runtime than the period. */ -- cgit From cce7ade803699463ecc62a065ca522004f7ccb3d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Jan 2009 14:53:37 +0100 Subject: sched: SCHED_IDLE weight change Increase the SCHED_IDLE weight from 2 to 3, this gives much more stable vruntime numbers. time advanced in 100ms: weight=2 64765.988352 67012.881408 88501.412352 weight=3 35496.181411 34130.971298 35497.411573 Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/sched.c b/kernel/sched.c index ed62d1cee05..6acfb3c2398 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1323,8 +1323,8 @@ static inline void update_load_sub(struct load_weight *lw, unsigned long dec) * slice expiry etc. */ -#define WEIGHT_IDLEPRIO 2 -#define WMULT_IDLEPRIO (1 << 31) +#define WEIGHT_IDLEPRIO 3 +#define WMULT_IDLEPRIO 1431655765 /* * Nice levels are multiplicative, with a gentle 10% change for every -- cgit From 6bc912b71b6f33b041cfde93ca3f019cbaa852bc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Jan 2009 14:53:38 +0100 Subject: sched: SCHED_OTHER vs SCHED_IDLE isolation Stronger SCHED_IDLE isolation: - no SCHED_IDLE buddies - never let SCHED_IDLE preempt on wakeup - always preempt SCHED_IDLE on wakeup - limit SLEEPER fairness for SCHED_IDLE. Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 8e1352c7555..cdebd8089cb 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -677,9 +677,13 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) unsigned long thresh = sysctl_sched_latency; /* - * convert the sleeper threshold into virtual time + * Convert the sleeper threshold into virtual time. + * SCHED_IDLE is a special sub-class. We care about + * fairness only relative to other SCHED_IDLE tasks, + * all of which have the same weight. */ - if (sched_feat(NORMALIZED_SLEEPER)) + if (sched_feat(NORMALIZED_SLEEPER) && + task_of(se)->policy != SCHED_IDLE) thresh = calc_delta_fair(thresh, se); vruntime -= thresh; @@ -1340,14 +1344,18 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) static void set_last_buddy(struct sched_entity *se) { - for_each_sched_entity(se) - cfs_rq_of(se)->last = se; + if (likely(task_of(se)->policy != SCHED_IDLE)) { + for_each_sched_entity(se) + cfs_rq_of(se)->last = se; + } } static void set_next_buddy(struct sched_entity *se) { - for_each_sched_entity(se) - cfs_rq_of(se)->next = se; + if (likely(task_of(se)->policy != SCHED_IDLE)) { + for_each_sched_entity(se) + cfs_rq_of(se)->next = se; + } } /* @@ -1393,12 +1401,18 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) return; /* - * Batch tasks do not preempt (their preemption is driven by + * Batch and idle tasks do not preempt (their preemption is driven by * the tick): */ - if (unlikely(p->policy == SCHED_BATCH)) + if (unlikely(p->policy != SCHED_NORMAL)) return; + /* Idle tasks are by definition preempted by everybody. */ + if (unlikely(curr->policy == SCHED_IDLE)) { + resched_task(curr); + return; + } + if (!sched_feat(WAKEUP_PREEMPT)) return; -- cgit From e17036dac189dd034c092a91df56aa740db7146d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 15 Jan 2009 14:53:39 +0100 Subject: sched: fix update_min_vruntime Impact: fix SCHED_IDLE latency problems OK, so we have 1 running task A (which is obviously curr and the tree is equally obviously empty). 'A' nicely chugs along, doing its thing, carrying min_vruntime along as it goes. Then some whacko speed freak SCHED_IDLE task gets inserted due to SMP balancing, which is very likely far right, in that case update_curr update_min_vruntime cfs_rq->rb_leftmost := true (the crazy task sitting in a tree) vruntime = se->vruntime and voila, min_vruntime is waaay right of where it ought to be. OK, so why did I write it like that to begin with... Aah, yes. Say we've just dequeued current schedule deactivate_task(prev) dequeue_entity update_min_vruntime Then we'll set vruntime = cfs_rq->min_vruntime; we find !cfs_rq->curr, but do find someone in the tree. Then we _must_ do vruntime = se->vruntime, because vruntime = min_vruntime(vruntime := cfs_rq->min_vruntime, se->vruntime) will not advance vruntime, and cause lags the other way around (which we fixed with that initial patch: 1af5f730fc1bf7c62ec9fb2d307206e18bf40a69 (sched: more accurate min_vruntime accounting). Signed-off-by: Peter Zijlstra Tested-by: Mike Galbraith Acked-by: Mike Galbraith Cc: Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index cdebd8089cb..16b419bb8b0 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -283,7 +283,7 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq) struct sched_entity, run_node); - if (vruntime == cfs_rq->min_vruntime) + if (!cfs_rq->curr) vruntime = se->vruntime; else vruntime = min_vruntime(vruntime, se->vruntime); -- cgit From eff317d0834ad1ff03f747f6bc2d76b9a9c95160 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Thu, 15 Jan 2009 14:40:47 +0200 Subject: ASoC: Fix the power update function for snd_soc_dapm_value_mux Modify the check for the mux type to also handle the snd_soc_dapm_value_mux type in a same way as the snd_soc_dapm_mux. Signed-off-by: Peter Ujfalusi Signed-off-by: Takashi Iwai --- sound/soc/soc-dapm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 493a4e8aa27..a2f1da8b464 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -720,7 +720,8 @@ static int dapm_mux_update_power(struct snd_soc_dapm_widget *widget, struct snd_soc_dapm_path *path; int found = 0; - if (widget->id != snd_soc_dapm_mux) + if (widget->id != snd_soc_dapm_mux && + widget->id != snd_soc_dapm_value_mux) return -ENODEV; if (!snd_soc_test_bits(widget->codec, e->reg, mask, val)) -- cgit From 428549f5746c9d6135d425d076a1bed2614d58ee Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 15 Jan 2009 16:56:59 +0100 Subject: ALSA: hda - Fix invalid amp value for STAC925x The value set in the commit 2465fb6605b4f8f3964b132017bf4078d1265fe9 is actually wrong. The value range is from 0 to 0x1f while the patch sets to 0x7f. Let's fix it. Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_sigmatel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index e6c13963f21..c39deebb588 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -886,7 +886,7 @@ static struct hda_verb stac925x_core_init[] = { /* set dac0mux for dac converter */ { 0x06, AC_VERB_SET_CONNECT_SEL, 0x00}, /* unmute and set max the selector */ - { 0x0e, AC_VERB_SET_AMP_GAIN_MUTE, 0xb07f }, + { 0x0e, AC_VERB_SET_AMP_GAIN_MUTE, 0xb01f }, {} }; -- cgit From 1cf167f27ad2720af11ee8aa350009342f909e70 Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Wed, 14 Jan 2009 21:22:18 -0800 Subject: bnx2x: Using singlethread work queue Since slow-path events, including link update, are handled in work-queue, a race condition was introduced in the self-test that sometimes caused the link status to fail: the self-test was running under RTNL lock, and if the link-watch was scheduled it stoped the shared work-queue (waiting for the RTNL lock) and so the link update event was not handled until the self-test ended (releasing the RTNL lock) with failure (since the link status was not updated) Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x.h | 2 +- drivers/net/bnx2x_main.c | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/net/bnx2x.h b/drivers/net/bnx2x.h index fd705d1295a..96a8889afbe 100644 --- a/drivers/net/bnx2x.h +++ b/drivers/net/bnx2x.h @@ -811,7 +811,7 @@ struct bnx2x { int pm_cap; int pcie_cap; - struct work_struct sp_task; + struct delayed_work sp_task; struct work_struct reset_task; struct timer_list timer; diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 4be05847f86..cc6ffba7459 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -95,6 +95,7 @@ MODULE_PARM_DESC(debug, "default debug msglevel"); module_param(use_multi, int, 0); MODULE_PARM_DESC(use_multi, "use per-CPU queues"); #endif +static struct workqueue_struct *bnx2x_wq; enum bnx2x_board_type { BCM57710 = 0, @@ -671,7 +672,8 @@ static void bnx2x_int_disable_sync(struct bnx2x *bp, int disable_hw) synchronize_irq(bp->pdev->irq); /* make sure sp_task is not running */ - cancel_work_sync(&bp->sp_task); + cancel_delayed_work(&bp->sp_task); + flush_workqueue(bnx2x_wq); } /* fast path */ @@ -1660,7 +1662,7 @@ static irqreturn_t bnx2x_interrupt(int irq, void *dev_instance) if (unlikely(status & 0x1)) { - schedule_work(&bp->sp_task); + queue_delayed_work(bnx2x_wq, &bp->sp_task, 0); status &= ~0x1; if (!status) @@ -2820,7 +2822,7 @@ static void bnx2x_attn_int(struct bnx2x *bp) static void bnx2x_sp_task(struct work_struct *work) { - struct bnx2x *bp = container_of(work, struct bnx2x, sp_task); + struct bnx2x *bp = container_of(work, struct bnx2x, sp_task.work); u16 status; @@ -2875,7 +2877,7 @@ static irqreturn_t bnx2x_msix_sp_int(int irq, void *dev_instance) return IRQ_HANDLED; #endif - schedule_work(&bp->sp_task); + queue_delayed_work(bnx2x_wq, &bp->sp_task, 0); return IRQ_HANDLED; } @@ -7501,7 +7503,7 @@ static int __devinit bnx2x_init_bp(struct bnx2x *bp) mutex_init(&bp->port.phy_mutex); - INIT_WORK(&bp->sp_task, bnx2x_sp_task); + INIT_DELAYED_WORK(&bp->sp_task, bnx2x_sp_task); INIT_WORK(&bp->reset_task, bnx2x_reset_task); rc = bnx2x_get_hwinfo(bp); @@ -10519,12 +10521,20 @@ static struct pci_driver bnx2x_pci_driver = { static int __init bnx2x_init(void) { + bnx2x_wq = create_singlethread_workqueue("bnx2x"); + if (bnx2x_wq == NULL) { + printk(KERN_ERR PFX "Cannot create workqueue\n"); + return -ENOMEM; + } + return pci_register_driver(&bnx2x_pci_driver); } static void __exit bnx2x_cleanup(void) { pci_unregister_driver(&bnx2x_pci_driver); + + destroy_workqueue(bnx2x_wq); } module_init(bnx2x_init); -- cgit From 58f4c4cfce5c4715b79621f0a635925c55f855d5 Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Wed, 14 Jan 2009 21:23:36 -0800 Subject: bnx2x: Missing memory barriers While working on IA64, it became clear that the following memory barriers are missing Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index cc6ffba7459..f0b2e73b87f 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -1357,11 +1357,23 @@ static inline void bnx2x_update_rx_prod(struct bnx2x *bp, rx_prods.cqe_prod = rx_comp_prod; rx_prods.sge_prod = rx_sge_prod; + /* + * Make sure that the BD and SGE data is updated before updating the + * producers since FW might read the BD/SGE right after the producer + * is updated. + * This is only applicable for weak-ordered memory model archs such + * as IA-64. The following barrier is also mandatory since FW will + * assumes BDs must have buffers. + */ + wmb(); + for (i = 0; i < sizeof(struct tstorm_eth_rx_producers)/4; i++) REG_WR(bp, BAR_TSTRORM_INTMEM + TSTORM_RX_PRODS_OFFSET(BP_PORT(bp), FP_CL_ID(fp)) + i*4, ((u32 *)&rx_prods)[i]); + mmiowb(); /* keep prod updates ordered */ + DP(NETIF_MSG_RX_STATUS, "Wrote: bd_prod %u cqe_prod %u sge_prod %u\n", bd_prod, rx_comp_prod, rx_sge_prod); @@ -1582,7 +1594,6 @@ next_cqe: /* Update producers */ bnx2x_update_rx_prod(bp, fp, bd_prod_fw, sw_comp_prod, fp->rx_sge_prod); - mmiowb(); /* keep prod updates ordered */ fp->rx_pkt += rx_pkt; fp->rx_calls++; @@ -8729,6 +8740,8 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode, u8 link_up) tx_bd->general_data = ((UNICAST_ADDRESS << ETH_TX_BD_ETH_ADDR_TYPE_SHIFT) | 1); + wmb(); + fp->hw_tx_prods->bds_prod = cpu_to_le16(le16_to_cpu(fp->hw_tx_prods->bds_prod) + 1); mb(); /* FW restriction: must not reorder writing nbd and packets */ @@ -8780,7 +8793,6 @@ test_loopback_rx_exit: /* Update producers */ bnx2x_update_rx_prod(bp, fp, fp->rx_bd_prod, fp->rx_comp_prod, fp->rx_sge_prod); - mmiowb(); /* keep prod updates ordered */ test_loopback_exit: bp->link_params.loopback_mode = LOOPBACK_NONE; @@ -9707,6 +9719,15 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) DP(NETIF_MSG_TX_QUEUED, "doorbell: nbd %d bd %u\n", nbd, bd_prod); + /* + * Make sure that the BD data is updated before updating the producer + * since FW might read the BD right after the producer is updated. + * This is only applicable for weak-ordered memory model archs such + * as IA-64. The following barrier is also mandatory since FW will + * assumes packets must have BDs. + */ + wmb(); + fp->hw_tx_prods->bds_prod = cpu_to_le16(le16_to_cpu(fp->hw_tx_prods->bds_prod) + nbd); mb(); /* FW restriction: must not reorder writing nbd and packets */ @@ -9720,6 +9741,9 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) dev->trans_start = jiffies; if (unlikely(bnx2x_tx_avail(fp) < MAX_SKB_FRAGS + 3)) { + /* We want bnx2x_tx_int to "see" the updated tx_bd_prod + if we put Tx into XOFF state. */ + smp_mb(); netif_stop_queue(dev); bp->eth_stats.driver_xoff++; if (bnx2x_tx_avail(fp) >= MAX_SKB_FRAGS + 3) -- cgit From 4f40f2cba244e04c0f385c5ce60498b513b335dd Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Wed, 14 Jan 2009 21:24:17 -0800 Subject: bnx2x: Using system page size for SGE When the page size is not 4KB, the FW must be programmed to work with the right SGE boundaries and fragment list length. To avoid confusion with the BCM_PAGE_SIZE which is set to 4KB for the FW sake, another alias for the system page size was added to explicitly indicate that it is meant for the SGE Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x.h | 3 +++ drivers/net/bnx2x_main.c | 32 ++++++++++++++++---------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/net/bnx2x.h b/drivers/net/bnx2x.h index 96a8889afbe..2cd1e427828 100644 --- a/drivers/net/bnx2x.h +++ b/drivers/net/bnx2x.h @@ -150,6 +150,9 @@ struct sw_rx_page { #define PAGES_PER_SGE_SHIFT 0 #define PAGES_PER_SGE (1 << PAGES_PER_SGE_SHIFT) +#define SGE_PAGE_SIZE PAGE_SIZE +#define SGE_PAGE_SHIFT PAGE_SHIFT +#define SGE_PAGE_ALIGN(addr) PAGE_ALIGN(addr) #define BCM_RX_ETH_PAYLOAD_ALIGN 64 diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index f0b2e73b87f..75b2624cd60 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -974,7 +974,7 @@ static inline void bnx2x_free_rx_sge(struct bnx2x *bp, return; pci_unmap_page(bp->pdev, pci_unmap_addr(sw_buf, mapping), - BCM_PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE); + SGE_PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE); __free_pages(page, PAGES_PER_SGE_SHIFT); sw_buf->page = NULL; @@ -1002,7 +1002,7 @@ static inline int bnx2x_alloc_rx_sge(struct bnx2x *bp, if (unlikely(page == NULL)) return -ENOMEM; - mapping = pci_map_page(bp->pdev, page, 0, BCM_PAGE_SIZE*PAGES_PER_SGE, + mapping = pci_map_page(bp->pdev, page, 0, SGE_PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE); if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) { __free_pages(page, PAGES_PER_SGE_SHIFT); @@ -1098,9 +1098,9 @@ static void bnx2x_update_sge_prod(struct bnx2x_fastpath *fp, struct eth_fast_path_rx_cqe *fp_cqe) { struct bnx2x *bp = fp->bp; - u16 sge_len = BCM_PAGE_ALIGN(le16_to_cpu(fp_cqe->pkt_len) - + u16 sge_len = SGE_PAGE_ALIGN(le16_to_cpu(fp_cqe->pkt_len) - le16_to_cpu(fp_cqe->len_on_bd)) >> - BCM_PAGE_SHIFT; + SGE_PAGE_SHIFT; u16 last_max, last_elem, first_elem; u16 delta = 0; u16 i; @@ -1205,22 +1205,22 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp, u16 cqe_idx) { struct sw_rx_page *rx_pg, old_rx_pg; - struct page *sge; u16 len_on_bd = le16_to_cpu(fp_cqe->len_on_bd); u32 i, frag_len, frag_size, pages; int err; int j; frag_size = le16_to_cpu(fp_cqe->pkt_len) - len_on_bd; - pages = BCM_PAGE_ALIGN(frag_size) >> BCM_PAGE_SHIFT; + pages = SGE_PAGE_ALIGN(frag_size) >> SGE_PAGE_SHIFT; /* This is needed in order to enable forwarding support */ if (frag_size) - skb_shinfo(skb)->gso_size = min((u32)BCM_PAGE_SIZE, + skb_shinfo(skb)->gso_size = min((u32)SGE_PAGE_SIZE, max(frag_size, (u32)len_on_bd)); #ifdef BNX2X_STOP_ON_ERROR - if (pages > 8*PAGES_PER_SGE) { + if (pages > + min((u32)8, (u32)MAX_SKB_FRAGS) * SGE_PAGE_SIZE * PAGES_PER_SGE) { BNX2X_ERR("SGL length is too long: %d. CQE index is %d\n", pages, cqe_idx); BNX2X_ERR("fp_cqe->pkt_len = %d fp_cqe->len_on_bd = %d\n", @@ -1236,9 +1236,8 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp, /* FW gives the indices of the SGE as if the ring is an array (meaning that "next" element will consume 2 indices) */ - frag_len = min(frag_size, (u32)(BCM_PAGE_SIZE*PAGES_PER_SGE)); + frag_len = min(frag_size, (u32)(SGE_PAGE_SIZE*PAGES_PER_SGE)); rx_pg = &fp->rx_page_ring[sge_idx]; - sge = rx_pg->page; old_rx_pg = *rx_pg; /* If we fail to allocate a substitute page, we simply stop @@ -1251,7 +1250,7 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp, /* Unmap the page as we r going to pass it to the stack */ pci_unmap_page(bp->pdev, pci_unmap_addr(&old_rx_pg, mapping), - BCM_PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE); + SGE_PAGE_SIZE*PAGES_PER_SGE, PCI_DMA_FROMDEVICE); /* Add one frag and update the appropriate fields in the skb */ skb_fill_page_desc(skb, j, old_rx_pg.page, 0, frag_len); @@ -4544,7 +4543,7 @@ static void bnx2x_set_client_config(struct bnx2x *bp) if (bp->flags & TPA_ENABLE_FLAG) { tstorm_client.max_sges_for_packet = - BCM_PAGE_ALIGN(tstorm_client.mtu) >> BCM_PAGE_SHIFT; + SGE_PAGE_ALIGN(tstorm_client.mtu) >> SGE_PAGE_SHIFT; tstorm_client.max_sges_for_packet = ((tstorm_client.max_sges_for_packet + PAGES_PER_SGE - 1) & (~(PAGES_PER_SGE - 1))) >> @@ -4727,10 +4726,11 @@ static void bnx2x_init_internal_func(struct bnx2x *bp) bp->e1hov); } - /* Init CQ ring mapping and aggregation size */ - max_agg_size = min((u32)(bp->rx_buf_size + - 8*BCM_PAGE_SIZE*PAGES_PER_SGE), - (u32)0xffff); + /* Init CQ ring mapping and aggregation size, the FW limit is 8 frags */ + max_agg_size = + min((u32)(min((u32)8, (u32)MAX_SKB_FRAGS) * + SGE_PAGE_SIZE * PAGES_PER_SGE), + (u32)0xffff); for_each_queue(bp, i) { struct bnx2x_fastpath *fp = &bp->fp[i]; -- cgit From ad33ea3a8d2ec324dc0f46b6ae404d824d2b349b Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Wed, 14 Jan 2009 21:24:57 -0800 Subject: bnx2x: Missing mask when calculating flow control Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 75b2624cd60..9b1555820f5 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -1899,7 +1899,8 @@ static int bnx2x_set_spio(struct bnx2x *bp, int spio_num, u32 mode) static void bnx2x_calc_fc_adv(struct bnx2x *bp) { - switch (bp->link_vars.ieee_fc) { + switch (bp->link_vars.ieee_fc & + MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_MASK) { case MDIO_COMBO_IEEE0_AUTO_NEG_ADV_PAUSE_NONE: bp->port.advertising &= ~(ADVERTISED_Asym_Pause | ADVERTISED_Pause); -- cgit From 3c96c68b0c67d11b8519bc38233aec586f0211f4 Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Wed, 14 Jan 2009 21:25:31 -0800 Subject: bnx2x: Flow control updated before reporting the link Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 9b1555820f5..4f1ee1f2968 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -1970,10 +1970,11 @@ static u8 bnx2x_initial_phy_init(struct bnx2x *bp) rc = bnx2x_phy_init(&bp->link_params, &bp->link_vars); bnx2x_release_phy_lock(bp); + bnx2x_calc_fc_adv(bp); + if (bp->link_vars.link_up) bnx2x_link_report(bp); - bnx2x_calc_fc_adv(bp); return rc; } -- cgit From a5e9a7cfad5fd301ce2b7869bbf386b70aa39e7c Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Wed, 14 Jan 2009 21:26:01 -0800 Subject: bnx2x: Protecting the link change indication Without this lock, in some race conditions the driver missed link change indication Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 4f1ee1f2968..701bcc1260c 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -2234,9 +2234,7 @@ static void bnx2x_link_attn(struct bnx2x *bp) /* Make sure that we are synced with the current statistics */ bnx2x_stats_handle(bp, STATS_EVENT_STOP); - bnx2x_acquire_phy_lock(bp); bnx2x_link_update(&bp->link_params, &bp->link_vars); - bnx2x_release_phy_lock(bp); if (bp->link_vars.link_up) { @@ -2485,6 +2483,8 @@ static void bnx2x_attn_int_asserted(struct bnx2x *bp, u32 asserted) if (asserted & ATTN_HARD_WIRED_MASK) { if (asserted & ATTN_NIG_FOR_FUNC) { + bnx2x_acquire_phy_lock(bp); + /* save nig interrupt mask */ bp->nig_mask = REG_RD(bp, nig_int_mask_addr); REG_WR(bp, nig_int_mask_addr, 0); @@ -2540,8 +2540,10 @@ static void bnx2x_attn_int_asserted(struct bnx2x *bp, u32 asserted) REG_WR(bp, hc_addr, asserted); /* now set back the mask */ - if (asserted & ATTN_NIG_FOR_FUNC) + if (asserted & ATTN_NIG_FOR_FUNC) { REG_WR(bp, nig_int_mask_addr, bp->nig_mask); + bnx2x_release_phy_lock(bp); + } } static inline void bnx2x_attn_int_deasserted0(struct bnx2x *bp, u32 attn) -- cgit From 0c6671b0d94f706dfc20cb22d792218ba9814412 Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Wed, 14 Jan 2009 21:26:51 -0800 Subject: bnx2x: VLAN tagged packets without VLAN offload Wrong handling of tagged packet if VLAN offload is disabled caused packets to get corrupted Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x.h | 12 +++++++----- drivers/net/bnx2x_main.c | 42 +++++++++++++++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/drivers/net/bnx2x.h b/drivers/net/bnx2x.h index 2cd1e427828..e7fbca7722d 100644 --- a/drivers/net/bnx2x.h +++ b/drivers/net/bnx2x.h @@ -20,6 +20,11 @@ * (you will need to reboot afterwards) */ /* #define BNX2X_STOP_ON_ERROR */ +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#define BCM_VLAN 1 +#endif + + /* error/debug prints */ #define DRV_MODULE_NAME "bnx2x" @@ -78,11 +83,6 @@ #endif -#ifdef NETIF_F_HW_VLAN_TX -#define BCM_VLAN 1 -#endif - - #define U64_LO(x) (u32)(((u64)(x)) & 0xffffffff) #define U64_HI(x) (u32)(((u64)(x)) >> 32) #define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo)) @@ -804,6 +804,8 @@ struct bnx2x { #define TPA_ENABLE_FLAG 0x80 #define NO_MCP_FLAG 0x100 #define BP_NOMCP(bp) (bp->flags & NO_MCP_FLAG) +#define HW_VLAN_TX_FLAG 0x400 +#define HW_VLAN_RX_FLAG 0x800 int func; #define BP_PORT(bp) (bp->func % PORT_MAX) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 701bcc1260c..ca8b25126b2 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -38,9 +38,7 @@ #include #include #include -#ifdef NETIF_F_HW_VLAN_TX - #include -#endif +#include #include #include #include @@ -1283,6 +1281,13 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp, if (likely(new_skb)) { /* fix ip xsum and give it to the stack */ /* (no need to map the new skb) */ +#ifdef BCM_VLAN + int is_vlan_cqe = + (le16_to_cpu(cqe->fast_path_cqe.pars_flags.flags) & + PARSING_FLAGS_VLAN); + int is_not_hwaccel_vlan_cqe = + (is_vlan_cqe && (!(bp->flags & HW_VLAN_RX_FLAG))); +#endif prefetch(skb); prefetch(((char *)(skb)) + 128); @@ -1307,6 +1312,12 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp, struct iphdr *iph; iph = (struct iphdr *)skb->data; +#ifdef BCM_VLAN + /* If there is no Rx VLAN offloading - + take VLAN tag into an account */ + if (unlikely(is_not_hwaccel_vlan_cqe)) + iph = (struct iphdr *)((u8 *)iph + VLAN_HLEN); +#endif iph->check = 0; iph->check = ip_fast_csum((u8 *)iph, iph->ihl); } @@ -1314,9 +1325,8 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp, if (!bnx2x_fill_frag_skb(bp, fp, skb, &cqe->fast_path_cqe, cqe_idx)) { #ifdef BCM_VLAN - if ((bp->vlgrp != NULL) && - (le16_to_cpu(cqe->fast_path_cqe.pars_flags.flags) & - PARSING_FLAGS_VLAN)) + if ((bp->vlgrp != NULL) && is_vlan_cqe && + (!is_not_hwaccel_vlan_cqe)) vlan_hwaccel_receive_skb(skb, bp->vlgrp, le16_to_cpu(cqe->fast_path_cqe. vlan_tag)); @@ -1560,7 +1570,7 @@ reuse_rx: } #ifdef BCM_VLAN - if ((bp->vlgrp != NULL) && + if ((bp->vlgrp != NULL) && (bp->flags & HW_VLAN_RX_FLAG) && (le16_to_cpu(cqe->fast_path_cqe.pars_flags.flags) & PARSING_FLAGS_VLAN)) vlan_hwaccel_receive_skb(skb, bp->vlgrp, @@ -4538,7 +4548,7 @@ static void bnx2x_set_client_config(struct bnx2x *bp) tstorm_client.config_flags = TSTORM_ETH_CLIENT_CONFIG_STATSITICS_ENABLE; #ifdef BCM_VLAN - if (bp->rx_mode && bp->vlgrp) { + if (bp->rx_mode && bp->vlgrp && (bp->flags & HW_VLAN_RX_FLAG)) { tstorm_client.config_flags |= TSTORM_ETH_CLIENT_CONFIG_VLAN_REMOVAL_ENABLE; DP(NETIF_MSG_IFUP, "vlan removal enabled\n"); @@ -9567,11 +9577,14 @@ static int bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) "sending pkt %u @%p next_idx %u bd %u @%p\n", pkt_prod, tx_buf, fp->tx_pkt_prod, bd_prod, tx_bd); - if ((bp->vlgrp != NULL) && vlan_tx_tag_present(skb)) { +#ifdef BCM_VLAN + if ((bp->vlgrp != NULL) && vlan_tx_tag_present(skb) && + (bp->flags & HW_VLAN_TX_FLAG)) { tx_bd->vlan = cpu_to_le16(vlan_tx_tag_get(skb)); tx_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_VLAN_TAG; vlan_off += 4; } else +#endif tx_bd->vlan = cpu_to_le16(pkt_prod); if (xmit_type) { @@ -10017,6 +10030,16 @@ static void bnx2x_vlan_rx_register(struct net_device *dev, struct bnx2x *bp = netdev_priv(dev); bp->vlgrp = vlgrp; + + /* Set flags according to the required capabilities */ + bp->flags &= ~(HW_VLAN_RX_FLAG | HW_VLAN_TX_FLAG); + + if (dev->features & NETIF_F_HW_VLAN_TX) + bp->flags |= HW_VLAN_TX_FLAG; + + if (dev->features & NETIF_F_HW_VLAN_RX) + bp->flags |= HW_VLAN_RX_FLAG; + if (netif_running(dev)) bnx2x_set_client_config(bp); } @@ -10173,6 +10196,7 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev, dev->features |= NETIF_F_HIGHDMA; #ifdef BCM_VLAN dev->features |= (NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX); + bp->flags |= (HW_VLAN_RX_FLAG | HW_VLAN_TX_FLAG); #endif dev->features |= (NETIF_F_TSO | NETIF_F_TSO_ECN); dev->features |= NETIF_F_TSO6; -- cgit From 68d5948436c2f782ebb5ddf25a6588ee452e8c30 Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Wed, 14 Jan 2009 21:27:36 -0800 Subject: bnx2x: Endianness issues Adding missing le_to_cpu and disabling wrong HW endianity flag (the two complete each other) Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index ca8b25126b2..d2350dd300b 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -1438,7 +1438,7 @@ static int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget) DP(NETIF_MSG_RX_STATUS, "CQE type %x err %x status %x" " queue %x vlan %x len %u\n", CQE_TYPE(cqe_fp_flags), cqe_fp_flags, cqe->fast_path_cqe.status_flags, - cqe->fast_path_cqe.rss_hash_result, + le32_to_cpu(cqe->fast_path_cqe.rss_hash_result), le16_to_cpu(cqe->fast_path_cqe.vlan_tag), le16_to_cpu(cqe->fast_path_cqe.pkt_len)); @@ -2821,8 +2821,10 @@ static void bnx2x_attn_int_deasserted(struct bnx2x *bp, u32 deasserted) static void bnx2x_attn_int(struct bnx2x *bp) { /* read local copy of bits */ - u32 attn_bits = bp->def_status_blk->atten_status_block.attn_bits; - u32 attn_ack = bp->def_status_blk->atten_status_block.attn_bits_ack; + u32 attn_bits = le32_to_cpu(bp->def_status_blk->atten_status_block. + attn_bits); + u32 attn_ack = le32_to_cpu(bp->def_status_blk->atten_status_block. + attn_bits_ack); u32 attn_state = bp->attn_state; /* look for changed bits */ @@ -2870,7 +2872,7 @@ static void bnx2x_sp_task(struct work_struct *work) if (status & 0x2) bp->stats_pending = 0; - bnx2x_ack_sb(bp, DEF_SB_ID, ATTENTION_ID, bp->def_att_idx, + bnx2x_ack_sb(bp, DEF_SB_ID, ATTENTION_ID, le16_to_cpu(bp->def_att_idx), IGU_INT_NOP, 1); bnx2x_ack_sb(bp, DEF_SB_ID, USTORM_ID, le16_to_cpu(bp->def_u_idx), IGU_INT_NOP, 1); @@ -5161,7 +5163,6 @@ static int bnx2x_init_common(struct bnx2x *bp) REG_WR(bp, PXP2_REG_RQ_SRC_ENDIAN_M, 1); REG_WR(bp, PXP2_REG_RQ_CDU_ENDIAN_M, 1); REG_WR(bp, PXP2_REG_RQ_DBG_ENDIAN_M, 1); - REG_WR(bp, PXP2_REG_RQ_HC_ENDIAN_M, 1); /* REG_WR(bp, PXP2_REG_RD_PBF_SWAP_MODE, 1); */ REG_WR(bp, PXP2_REG_RD_QM_SWAP_MODE, 1); -- cgit From a5f67a04d998b0b6e4beb1de8f1247dd93ac1ff4 Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Wed, 14 Jan 2009 21:28:13 -0800 Subject: bnx2x: Fixing the doorbell size The size of the doorbell is 4KB, this bug become visible when using more than 8 queues Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bnx2x.h b/drivers/net/bnx2x.h index e7fbca7722d..6fcccef4cf3 100644 --- a/drivers/net/bnx2x.h +++ b/drivers/net/bnx2x.h @@ -739,7 +739,7 @@ struct bnx2x { struct bnx2x_fastpath fp[MAX_CONTEXT]; void __iomem *regview; void __iomem *doorbells; -#define BNX2X_DB_SIZE (16*2048) +#define BNX2X_DB_SIZE (16*BCM_PAGE_SIZE) struct net_device *dev; struct pci_dev *pdev; -- cgit From f5ba6772f226be0266f95642c8162493246d3b79 Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Wed, 14 Jan 2009 21:29:18 -0800 Subject: bnx2x: Missing brackets Calculation bug due to missing brackets Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index d2350dd300b..a755fea996d 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -2920,7 +2920,7 @@ static irqreturn_t bnx2x_msix_sp_int(int irq, void *dev_instance) #define ADD_64(s_hi, a_hi, s_lo, a_lo) \ do { \ s_lo += a_lo; \ - s_hi += a_hi + (s_lo < a_lo) ? 1 : 0; \ + s_hi += a_hi + ((s_lo < a_lo) ? 1 : 0); \ } while (0) /* difference = minuend - subtrahend */ -- cgit From 26c8fa4d8a08b6e7a61f23339e2236218957ecc0 Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Wed, 14 Jan 2009 21:29:55 -0800 Subject: bnx2x: Indirection table initialization index Wrong initialization of the multi-queue indirection table - it should be using the function and not the port index Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index a755fea996d..9e6aa8a1ee9 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -4524,7 +4524,7 @@ static void bnx2x_init_context(struct bnx2x *bp) static void bnx2x_init_ind_table(struct bnx2x *bp) { - int port = BP_PORT(bp); + int func = BP_FUNC(bp); int i; if (!is_multi(bp)) @@ -4533,10 +4533,8 @@ static void bnx2x_init_ind_table(struct bnx2x *bp) DP(NETIF_MSG_IFUP, "Initializing indirection table\n"); for (i = 0; i < TSTORM_INDIRECTION_TABLE_SIZE; i++) REG_WR8(bp, BAR_TSTRORM_INTMEM + - TSTORM_INDIRECTION_TABLE_OFFSET(port) + i, - i % bp->num_queues); - - REG_WR(bp, PRS_REG_A_PRSU_20, 0xf); + TSTORM_INDIRECTION_TABLE_OFFSET(func) + i, + BP_CL_ID(bp) + (i % bp->num_queues)); } static void bnx2x_set_client_config(struct bnx2x *bp) @@ -5240,6 +5238,7 @@ static int bnx2x_init_common(struct bnx2x *bp) } bnx2x_init_block(bp, PRS_COMMON_START, PRS_COMMON_END); + REG_WR(bp, PRS_REG_A_PRSU_20, 0xf); /* set NIC mode */ REG_WR(bp, PRS_REG_NIC_MODE, 1); if (CHIP_IS_E1H(bp)) -- cgit From e7799c5f79072b5b34cf08170f142bcb8569cfff Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Wed, 14 Jan 2009 21:30:27 -0800 Subject: bnx2x: MTU Filter Too big packets could pass due to wrong filter size Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index 9e6aa8a1ee9..b573951600d 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -4543,7 +4543,7 @@ static void bnx2x_set_client_config(struct bnx2x *bp) int port = BP_PORT(bp); int i; - tstorm_client.mtu = bp->dev->mtu + ETH_OVREHEAD; + tstorm_client.mtu = bp->dev->mtu; tstorm_client.statistics_counter_id = BP_CL_ID(bp); tstorm_client.config_flags = TSTORM_ETH_CLIENT_CONFIG_STATSITICS_ENABLE; -- cgit From 0ef00459a638ae4f5d1e5326d3e50232fa80119f Mon Sep 17 00:00:00 2001 From: Eilon Greenstein Date: Wed, 14 Jan 2009 21:31:08 -0800 Subject: bnx2x: First slow path interrupt race The "read for interrupts" flag must be set before enabling slow-path interrupts as well (and not just before fast-path interrupts) Signed-off-by: Eilon Greenstein Signed-off-by: David S. Miller --- drivers/net/bnx2x_main.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c index b573951600d..7c533797c06 100644 --- a/drivers/net/bnx2x_main.c +++ b/drivers/net/bnx2x_main.c @@ -4812,6 +4812,15 @@ static void bnx2x_nic_init(struct bnx2x *bp, u32 load_code) bnx2x_init_context(bp); bnx2x_init_internal(bp, load_code); bnx2x_init_ind_table(bp); + bnx2x_stats_init(bp); + + /* At this point, we are ready for interrupts */ + atomic_set(&bp->intr_sem, 0); + + /* flush all before enabling interrupts */ + mb(); + mmiowb(); + bnx2x_int_enable(bp); } @@ -6420,17 +6429,8 @@ static int bnx2x_nic_load(struct bnx2x *bp, int load_mode) } } - bnx2x_stats_init(bp); - bp->state = BNX2X_STATE_OPENING_WAIT4_PORT; - /* Enable Rx interrupt handling before sending the ramrod - as it's completed on Rx FP queue */ - bnx2x_napi_enable(bp); - - /* Enable interrupt handling */ - atomic_set(&bp->intr_sem, 0); - rc = bnx2x_setup_leading(bp); if (rc) { BNX2X_ERR("Setup leading failed!\n"); -- cgit From b96ecfa689126d1e652ebd758da0b5b9b27dbd12 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 14 Jan 2009 21:46:51 -0800 Subject: korina: fix usage of driver_data Using platform_set_drvdata() here makes no sense, since the driver_data field has already been filled with valuable data (i.e. the MAC address). Also having driver_data point to the net_device is rather pointless since struct korina_device contains an apropriate field for it. Signed-off-by: Phil Sutter Signed-off-by: David S. Miller --- drivers/net/korina.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index 4a5580c1126..fefb33db79a 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -1089,7 +1089,6 @@ static int korina_probe(struct platform_device *pdev) return -ENOMEM; } SET_NETDEV_DEV(dev, &pdev->dev); - platform_set_drvdata(pdev, dev); lp = netdev_priv(dev); bif->dev = dev; -- cgit From a13b27826a67bfc0ca444fb42885f2069b6898e2 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 14 Jan 2009 21:47:50 -0800 Subject: korina: reset resource buffer size to 1536 The new value is the one used in the external patch before and allows at least a standard MTU of 1500 to be handled correctly. Impact of this change gets visible when bigger packets are to be received, issuing: | ping -s 492 and bigger payload sized led to 100% packet loss. Signed-off-by: Phil Sutter Acked-by: Florian Fainelli --- drivers/net/korina.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index fefb33db79a..e30c2f437d1 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -84,7 +84,10 @@ #define KORINA_NUM_RDS 64 /* number of receive descriptors */ #define KORINA_NUM_TDS 64 /* number of transmit descriptors */ -#define KORINA_RBSIZE 536 /* size of one resource buffer = Ether MTU */ +/* KORINA_RBSIZE is the hardware's default maximum receive + * frame size in bytes. Having this hardcoded means that there + * is no support for MTU sizes greater than 1500. */ +#define KORINA_RBSIZE 1536 /* size of one resource buffer = Ether MTU */ #define KORINA_RDS_MASK (KORINA_NUM_RDS - 1) #define KORINA_TDS_MASK (KORINA_NUM_TDS - 1) #define RD_RING_SIZE (KORINA_NUM_RDS * sizeof(struct dma_desc)) -- cgit From beb0babfb77eab0cbcc7f64a7b8f3545fec5c0ba Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 14 Jan 2009 21:48:24 -0800 Subject: korina: disable napi on close and restart Without this the driver will crash when the NIC is being restarted. Signed-off-by: Phil Sutter Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/korina.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index e30c2f437d1..65b8487c189 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -904,6 +904,8 @@ static int korina_restart(struct net_device *dev) korina_free_ring(dev); + napi_disable(&lp->napi); + ret = korina_init(dev); if (ret < 0) { printk(KERN_ERR DRV_NAME "%s: cannot restart device\n", @@ -1070,6 +1072,8 @@ static int korina_close(struct net_device *dev) korina_free_ring(dev); + napi_disable(&lp->napi); + free_irq(lp->rx_irq, dev); free_irq(lp->tx_irq, dev); free_irq(lp->ovr_irq, dev); -- cgit From 4cf83b664fc14f8262d3013566ca36645f891df2 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 14 Jan 2009 21:48:59 -0800 Subject: korina: rework korina_rx() for use with napi This function needs an early exit condition to function properly, or else caller assumes napi workload wasn't enough to handle all received packets and korina_rx is called again (and again and again and ...). Signed-off-by: Phil Sutter Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/korina.c | 109 +++++++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 56 deletions(-) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index 65b8487c189..a1d8af7d0bc 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -353,15 +353,20 @@ static int korina_rx(struct net_device *dev, int limit) struct dma_desc *rd = &lp->rd_ring[lp->rx_next_done]; struct sk_buff *skb, *skb_new; u8 *pkt_buf; - u32 devcs, pkt_len, dmas, rx_free_desc; + u32 devcs, pkt_len, dmas; int count; dma_cache_inv((u32)rd, sizeof(*rd)); for (count = 0; count < limit; count++) { + skb = lp->rx_skb[lp->rx_next_done]; + skb_new = NULL; devcs = rd->devcs; + if ((KORINA_RBSIZE - (u32)DMA_COUNT(rd->control)) == 0) + break; + /* Update statistics counters */ if (devcs & ETH_RX_CRC) dev->stats.rx_crc_errors++; @@ -384,63 +389,55 @@ static int korina_rx(struct net_device *dev, int limit) * in Rc32434 (errata ref #077) */ dev->stats.rx_errors++; dev->stats.rx_dropped++; - } - - while ((rx_free_desc = KORINA_RBSIZE - (u32)DMA_COUNT(rd->control)) != 0) { - /* init the var. used for the later - * operations within the while loop */ - skb_new = NULL; + } else if ((devcs & ETH_RX_ROK)) { pkt_len = RCVPKT_LENGTH(devcs); - skb = lp->rx_skb[lp->rx_next_done]; - - if ((devcs & ETH_RX_ROK)) { - /* must be the (first and) last - * descriptor then */ - pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data; - - /* invalidate the cache */ - dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4); - - /* Malloc up new buffer. */ - skb_new = netdev_alloc_skb(dev, KORINA_RBSIZE + 2); - - if (!skb_new) - break; - /* Do not count the CRC */ - skb_put(skb, pkt_len - 4); - skb->protocol = eth_type_trans(skb, dev); - - /* Pass the packet to upper layers */ - netif_receive_skb(skb); - dev->stats.rx_packets++; - dev->stats.rx_bytes += pkt_len; - - /* Update the mcast stats */ - if (devcs & ETH_RX_MP) - dev->stats.multicast++; - - lp->rx_skb[lp->rx_next_done] = skb_new; - } - - rd->devcs = 0; - - /* Restore descriptor's curr_addr */ - if (skb_new) - rd->ca = CPHYSADDR(skb_new->data); - else - rd->ca = CPHYSADDR(skb->data); - - rd->control = DMA_COUNT(KORINA_RBSIZE) | - DMA_DESC_COD | DMA_DESC_IOD; - lp->rd_ring[(lp->rx_next_done - 1) & - KORINA_RDS_MASK].control &= - ~DMA_DESC_COD; - - lp->rx_next_done = (lp->rx_next_done + 1) & KORINA_RDS_MASK; - dma_cache_wback((u32)rd, sizeof(*rd)); - rd = &lp->rd_ring[lp->rx_next_done]; - writel(~DMA_STAT_DONE, &lp->rx_dma_regs->dmas); + + /* must be the (first and) last + * descriptor then */ + pkt_buf = (u8 *)lp->rx_skb[lp->rx_next_done]->data; + + /* invalidate the cache */ + dma_cache_inv((unsigned long)pkt_buf, pkt_len - 4); + + /* Malloc up new buffer. */ + skb_new = netdev_alloc_skb(dev, KORINA_RBSIZE + 2); + + if (!skb_new) + break; + /* Do not count the CRC */ + skb_put(skb, pkt_len - 4); + skb->protocol = eth_type_trans(skb, dev); + + /* Pass the packet to upper layers */ + netif_receive_skb(skb); + dev->stats.rx_packets++; + dev->stats.rx_bytes += pkt_len; + + /* Update the mcast stats */ + if (devcs & ETH_RX_MP) + dev->stats.multicast++; + + lp->rx_skb[lp->rx_next_done] = skb_new; } + + rd->devcs = 0; + + /* Restore descriptor's curr_addr */ + if (skb_new) + rd->ca = CPHYSADDR(skb_new->data); + else + rd->ca = CPHYSADDR(skb->data); + + rd->control = DMA_COUNT(KORINA_RBSIZE) | + DMA_DESC_COD | DMA_DESC_IOD; + lp->rd_ring[(lp->rx_next_done - 1) & + KORINA_RDS_MASK].control &= + ~DMA_DESC_COD; + + lp->rx_next_done = (lp->rx_next_done + 1) & KORINA_RDS_MASK; + dma_cache_wback((u32)rd, sizeof(*rd)); + rd = &lp->rd_ring[lp->rx_next_done]; + writel(~DMA_STAT_DONE, &lp->rx_dma_regs->dmas); } dmas = readl(&lp->rx_dma_regs->dmas); -- cgit From 4676f63d4c1e2e3530e42cb39bf88a1c1d4d78a5 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 14 Jan 2009 21:49:39 -0800 Subject: korina: do schedule napi after testing for it The called netif_rx_schedule() does all the work for us: - it checks the return value of netif_rx_schedule_prep() and - if everything is ok calls __netif_rx_schedule(). Before this change, the driver received absolutely nothing. Signed-off-by: Phil Sutter Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/korina.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index a1d8af7d0bc..7aa05f81fe9 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -330,7 +330,7 @@ static irqreturn_t korina_rx_dma_interrupt(int irq, void *dev_id) dmas = readl(&lp->rx_dma_regs->dmas); if (dmas & (DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR)) { - netif_rx_schedule_prep(&lp->napi); + netif_rx_schedule(&lp->napi); dmasm = readl(&lp->rx_dma_regs->dmasm); writel(dmasm | (DMA_STAT_DONE | -- cgit From 60d3f9827ca455e7272681d67a37137c328d7012 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 14 Jan 2009 21:50:12 -0800 Subject: korina: do tx at the right position Triggering TX before the write to the DMA status mask register leads to transferring packets with maximum payload no matter what the actual packet size is. While here, also trigger RX scheduling after writing the DMA status mask register, like it was in the original driver before it was sent upstream. Signed-off-by: Phil Sutter Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/korina.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index 7aa05f81fe9..dced5e71463 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -330,13 +330,13 @@ static irqreturn_t korina_rx_dma_interrupt(int irq, void *dev_id) dmas = readl(&lp->rx_dma_regs->dmas); if (dmas & (DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR)) { - netif_rx_schedule(&lp->napi); - dmasm = readl(&lp->rx_dma_regs->dmasm); writel(dmasm | (DMA_STAT_DONE | DMA_STAT_HALT | DMA_STAT_ERR), &lp->rx_dma_regs->dmasm); + netif_rx_schedule(&lp->napi); + if (dmas & DMA_STAT_ERR) printk(KERN_ERR DRV_NAME "%s: DMA error\n", dev->name); @@ -623,12 +623,12 @@ korina_tx_dma_interrupt(int irq, void *dev_id) dmas = readl(&lp->tx_dma_regs->dmas); if (dmas & (DMA_STAT_FINI | DMA_STAT_ERR)) { - korina_tx(dev); - dmasm = readl(&lp->tx_dma_regs->dmasm); writel(dmasm | (DMA_STAT_FINI | DMA_STAT_ERR), &lp->tx_dma_regs->dmasm); + korina_tx(dev); + if (lp->tx_chain_status == desc_filled && (readl(&(lp->tx_dma_regs->dmandptr)) == 0)) { writel(CPHYSADDR(&lp->td_ring[lp->tx_chain_head]), -- cgit From 97bc477cbc3d63f2a29c2c81031434b3a082f44c Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 14 Jan 2009 21:50:41 -0800 Subject: korina: fix handling tx_chain_tail Originally this must have been a rewrite error when introducing 'chain_index'. But the original driver did not use the previous chain item everywhere: when altering the address tx_chain_tail points to, it should move forward, not backwards. Also this is not an "index" but rather the penultimate element in the chain, so rename it accordingly. Signed-off-by: Phil Sutter Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/korina.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index dced5e71463..f2001750251 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -199,7 +199,7 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) struct korina_private *lp = netdev_priv(dev); unsigned long flags; u32 length; - u32 chain_index; + u32 chain_prev, chain_next; struct dma_desc *td; spin_lock_irqsave(&lp->lock, flags); @@ -231,8 +231,8 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) /* Setup the transmit descriptor. */ dma_cache_inv((u32) td, sizeof(*td)); td->ca = CPHYSADDR(skb->data); - chain_index = (lp->tx_chain_tail - 1) & - KORINA_TDS_MASK; + chain_prev = (lp->tx_chain_tail - 1) & KORINA_TDS_MASK; + chain_next = (lp->tx_chain_tail + 1) & KORINA_TDS_MASK; if (readl(&(lp->tx_dma_regs->dmandptr)) == 0) { if (lp->tx_chain_status == desc_empty) { @@ -240,7 +240,7 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) td->control = DMA_COUNT(length) | DMA_DESC_COF | DMA_DESC_IOF; /* Move tail */ - lp->tx_chain_tail = chain_index; + lp->tx_chain_tail = chain_next; /* Write to NDPTR */ writel(CPHYSADDR(&lp->td_ring[lp->tx_chain_head]), &lp->tx_dma_regs->dmandptr); @@ -251,12 +251,12 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) td->control = DMA_COUNT(length) | DMA_DESC_COF | DMA_DESC_IOF; /* Link to prev */ - lp->td_ring[chain_index].control &= + lp->td_ring[chain_prev].control &= ~DMA_DESC_COF; /* Link to prev */ - lp->td_ring[chain_index].link = CPHYSADDR(td); + lp->td_ring[chain_prev].link = CPHYSADDR(td); /* Move tail */ - lp->tx_chain_tail = chain_index; + lp->tx_chain_tail = chain_next; /* Write to NDPTR */ writel(CPHYSADDR(&lp->td_ring[lp->tx_chain_head]), &(lp->tx_dma_regs->dmandptr)); @@ -270,17 +270,17 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) td->control = DMA_COUNT(length) | DMA_DESC_COF | DMA_DESC_IOF; /* Move tail */ - lp->tx_chain_tail = chain_index; + lp->tx_chain_tail = chain_next; lp->tx_chain_status = desc_filled; netif_stop_queue(dev); } else { /* Update tail */ td->control = DMA_COUNT(length) | DMA_DESC_COF | DMA_DESC_IOF; - lp->td_ring[chain_index].control &= + lp->td_ring[chain_prev].control &= ~DMA_DESC_COF; - lp->td_ring[chain_index].link = CPHYSADDR(td); - lp->tx_chain_tail = chain_index; + lp->td_ring[chain_prev].link = CPHYSADDR(td); + lp->tx_chain_tail = chain_next; } } dma_cache_wback((u32) td, sizeof(*td)); -- cgit From 5edc7668bbece4238a32943ae7a47135af076948 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 14 Jan 2009 21:51:15 -0800 Subject: korina: do not stop queue here Apparently this doesn't make sense. Otherwise the queue gets disabled as soon as it's getting empty and can only be resurrected by a driver restart. Signed-off-by: Phil Sutter Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/korina.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index f2001750251..bd33fa91599 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -272,7 +272,6 @@ static int korina_send_packet(struct sk_buff *skb, struct net_device *dev) /* Move tail */ lp->tx_chain_tail = chain_next; lp->tx_chain_status = desc_filled; - netif_stop_queue(dev); } else { /* Update tail */ td->control = DMA_COUNT(length) | -- cgit From 1c5625cf0f121486abad4da0e0251ec67765aa95 Mon Sep 17 00:00:00 2001 From: Phil Sutter Date: Wed, 14 Jan 2009 21:51:48 -0800 Subject: korina: do not use IRQF_SHARED with IRQF_DISABLED As the kernel warning states: "IRQF_DISABLED is not guaranteed on shared IRQs". Since these IRQs' values are hardcoded and my test system doesn't show any shared use of IRQs at all, rather make them non-shared than non-disabled. Signed-off-by: Phil Sutter Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/korina.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/korina.c b/drivers/net/korina.c index bd33fa91599..1d6e48e1336 100644 --- a/drivers/net/korina.c +++ b/drivers/net/korina.c @@ -1000,14 +1000,14 @@ static int korina_open(struct net_device *dev) * that handles the Done Finished * Ovr and Und Events */ ret = request_irq(lp->rx_irq, &korina_rx_dma_interrupt, - IRQF_SHARED | IRQF_DISABLED, "Korina ethernet Rx", dev); + IRQF_DISABLED, "Korina ethernet Rx", dev); if (ret < 0) { printk(KERN_ERR DRV_NAME "%s: unable to get Rx DMA IRQ %d\n", dev->name, lp->rx_irq); goto err_release; } ret = request_irq(lp->tx_irq, &korina_tx_dma_interrupt, - IRQF_SHARED | IRQF_DISABLED, "Korina ethernet Tx", dev); + IRQF_DISABLED, "Korina ethernet Tx", dev); if (ret < 0) { printk(KERN_ERR DRV_NAME "%s: unable to get Tx DMA IRQ %d\n", dev->name, lp->tx_irq); @@ -1016,7 +1016,7 @@ static int korina_open(struct net_device *dev) /* Install handler for overrun error. */ ret = request_irq(lp->ovr_irq, &korina_ovr_interrupt, - IRQF_SHARED | IRQF_DISABLED, "Ethernet Overflow", dev); + IRQF_DISABLED, "Ethernet Overflow", dev); if (ret < 0) { printk(KERN_ERR DRV_NAME"%s: unable to get OVR IRQ %d\n", dev->name, lp->ovr_irq); @@ -1025,7 +1025,7 @@ static int korina_open(struct net_device *dev) /* Install handler for underflow error. */ ret = request_irq(lp->und_irq, &korina_und_interrupt, - IRQF_SHARED | IRQF_DISABLED, "Ethernet Underflow", dev); + IRQF_DISABLED, "Ethernet Underflow", dev); if (ret < 0) { printk(KERN_ERR DRV_NAME "%s: unable to get UND IRQ %d\n", dev->name, lp->und_irq); -- cgit From 7a0b6e01434f77194b86c8051b4c3718da636c64 Mon Sep 17 00:00:00 2001 From: Isaku Yamahata Date: Thu, 15 Jan 2009 15:16:55 +0900 Subject: [IA64] Update to use account_{steal,idle}_ticks This patch fixes the following errors caused by 79741dd35713ff4f6fd0eafd59fa94e8a4ba922d which changed the prototypes of account_steal_time() and account_idle_time(). > CC arch/ia64/xen/time.o > arch/ia64/xen/time.c: In function 'consider_steal_time': > arch/ia64/xen/time.c:132: warning: passing argument 1 of 'account_steal_time' makes integer from pointer without a cast > arch/ia64/xen/time.c:132: error: too many arguments to function 'account_steal_time' > arch/ia64/xen/time.c:133: warning: passing argument 1 of 'account_steal_time' makes integer from pointer without a cast > arch/ia64/xen/time.c:133: error: too many arguments to function 'account_steal_time' Cc: Martin Schwidefsky Signed-off-by: Isaku Yamahata Signed-off-by: Tony Luck --- arch/ia64/xen/time.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/ia64/xen/time.c b/arch/ia64/xen/time.c index d15a94c330f..68d6204c3f1 100644 --- a/arch/ia64/xen/time.c +++ b/arch/ia64/xen/time.c @@ -129,8 +129,8 @@ consider_steal_time(unsigned long new_itm) blocked = stolentick; if (stolen > 0 || blocked > 0) { - account_steal_time(NULL, jiffies_to_cputime(stolen)); - account_steal_time(idle_task(cpu), jiffies_to_cputime(blocked)); + account_steal_ticks(stolen); + account_idle_ticks(blocked); run_local_timers(); if (rcu_pending(cpu)) -- cgit From 9abf0eea877d6107d3a8a5c6913450e961fb7050 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sun, 11 Jan 2009 03:00:58 -0200 Subject: ACPI: thinkpad-acpi: update documents for the new location Update documentation to reflect the new location of the thinkpad-acpi driver. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Len Brown --- Documentation/laptops/thinkpad-acpi.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index 898b4987bb8..ddc371e0a1a 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt @@ -16,7 +16,8 @@ supported by the generic Linux ACPI drivers. This driver used to be named ibm-acpi until kernel 2.6.21 and release 0.13-20070314. It used to be in the drivers/acpi tree, but it was moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel -2.6.22, and release 0.14. +2.6.22, and release 0.14. It was moved to drivers/platform/x86 for +kernel 2.6.29. The driver is named "thinkpad-acpi". In some places, like module names, "thinkpad_acpi" is used because of userspace issues. -- cgit From e0b36fc5efd610a208b6b80e821a49302ca424ab Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 11 Jan 2009 03:00:59 -0200 Subject: ACPI: thinkpad-acpi: struct device - replace bus_id with dev_name(), dev_set_name() Signed-off-by: Kay Sievers Acked-by: Greg Kroah-Hartman Acked-by: Henrique de Moraes Holschuh Signed-off-by: Len Brown --- drivers/platform/x86/thinkpad_acpi.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 3478453eba7..ee3fa007f31 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -2375,7 +2375,7 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) /* forward it to userspace, maybe it knows how to handle it */ acpi_bus_generate_netlink_event( ibm->acpi->device->pnp.device_class, - ibm->acpi->device->dev.bus_id, + dev_name(&ibm->acpi->device->dev), event, 0); return; } @@ -2505,7 +2505,7 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) if (!ignore_acpi_ev && send_acpi_ev) { acpi_bus_generate_netlink_event( ibm->acpi->device->pnp.device_class, - ibm->acpi->device->dev.bus_id, + dev_name(&ibm->acpi->device->dev), event, hkey); } } @@ -3724,7 +3724,7 @@ static void dock_notify(struct ibm_struct *ibm, u32 event) } acpi_bus_generate_proc_event(ibm->acpi->device, event, data); acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class, - ibm->acpi->device->dev.bus_id, + dev_name(&ibm->acpi->device->dev), event, data); } @@ -3826,7 +3826,7 @@ static void bay_notify(struct ibm_struct *ibm, u32 event) { acpi_bus_generate_proc_event(ibm->acpi->device, event, 0); acpi_bus_generate_netlink_event(ibm->acpi->device->pnp.device_class, - ibm->acpi->device->dev.bus_id, + dev_name(&ibm->acpi->device->dev), event, 0); } -- cgit From 0773a6cf673316440999752e23f8c3d4f85e48b9 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Thu, 15 Jan 2009 10:29:17 -0800 Subject: [IA64] Turn on CONFIG_HAVE_UNSTABLE_CLOCK sched_clock() on ia64 is based on ar.itc, so is never completely synchronized between cpus. On some platforms (e.g. certain models of SGI Altix) it may be running at radically different frequencies. Based on a patch from Dimitri Sivanich which set this just for SN2 && GENERIC kernels ... it is needed for all ia64 machines. Signed-off-by: Tony Luck --- arch/ia64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 3d31636cbaf..6183aeccecf 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -17,6 +17,7 @@ config IA64 select ACPI if (!IA64_HP_SIM) select PM if (!IA64_HP_SIM) select ARCH_SUPPORTS_MSI + select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_IDE select HAVE_OPROFILE select HAVE_KPROBES -- cgit From a73f30916ee524437253739eacc682f6fb0f3ea8 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sun, 11 Jan 2009 03:01:00 -0200 Subject: ACPI: thinkpad-acpi: debug facility to emulate the rf switches This code is required to keep the thinkpad-acpi maintainer sane, and it is disabled by default. Add a debug facility to simulate an rfkill hardware rocker switch, a bluetooth rfkill soft-switch, a WWAN rfkill soft-switch on thinkpads. The simulated switches obviously do not kill any radios in hardware or firmware (unlike the real one). They also don't issue deprecated proc events. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Len Brown --- drivers/platform/x86/Kconfig | 11 ++ drivers/platform/x86/thinkpad_acpi.c | 190 +++++++++++++++++++++++++++++++++++ 2 files changed, 201 insertions(+) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index e65448e99b4..431772b8a1d 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -192,6 +192,17 @@ config THINKPAD_ACPI If you have an IBM or Lenovo ThinkPad laptop, say Y or M here. +config THINKPAD_ACPI_DEBUGFACILITIES + bool "Maintainer debug facilities" + depends on THINKPAD_ACPI + default n + ---help--- + Enables extra stuff in the thinkpad-acpi which is completely useless + for normal use. Read the driver source to find out what it does. + + Say N here, unless you were told by a kernel maintainer to do + otherwise. + config THINKPAD_ACPI_DEBUG bool "Verbose debug mode" depends on THINKPAD_ACPI diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index ee3fa007f31..a086ce8ed4e 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -288,6 +288,16 @@ struct tpacpi_led_classdev { unsigned int led; }; +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES +static int dbg_wlswemul; +static int tpacpi_wlsw_emulstate; +static int dbg_bluetoothemul; +static int tpacpi_bluetooth_emulstate; +static int dbg_wwanemul; +static int tpacpi_wwan_emulstate; +#endif + + /**************************************************************************** **************************************************************************** * @@ -1006,6 +1016,94 @@ static DRIVER_ATTR(version, S_IRUGO, /* --------------------------------------------------------------------- */ +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES + +static void tpacpi_send_radiosw_update(void); + +/* wlsw_emulstate ------------------------------------------------------ */ +static ssize_t tpacpi_driver_wlsw_emulstate_show(struct device_driver *drv, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", !!tpacpi_wlsw_emulstate); +} + +static ssize_t tpacpi_driver_wlsw_emulstate_store(struct device_driver *drv, + const char *buf, size_t count) +{ + unsigned long t; + + if (parse_strtoul(buf, 1, &t)) + return -EINVAL; + + if (tpacpi_wlsw_emulstate != t) { + tpacpi_wlsw_emulstate = !!t; + tpacpi_send_radiosw_update(); + } else + tpacpi_wlsw_emulstate = !!t; + + return count; +} + +static DRIVER_ATTR(wlsw_emulstate, S_IWUSR | S_IRUGO, + tpacpi_driver_wlsw_emulstate_show, + tpacpi_driver_wlsw_emulstate_store); + +/* bluetooth_emulstate ------------------------------------------------- */ +static ssize_t tpacpi_driver_bluetooth_emulstate_show( + struct device_driver *drv, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", !!tpacpi_bluetooth_emulstate); +} + +static ssize_t tpacpi_driver_bluetooth_emulstate_store( + struct device_driver *drv, + const char *buf, size_t count) +{ + unsigned long t; + + if (parse_strtoul(buf, 1, &t)) + return -EINVAL; + + tpacpi_bluetooth_emulstate = !!t; + + return count; +} + +static DRIVER_ATTR(bluetooth_emulstate, S_IWUSR | S_IRUGO, + tpacpi_driver_bluetooth_emulstate_show, + tpacpi_driver_bluetooth_emulstate_store); + +/* wwan_emulstate ------------------------------------------------- */ +static ssize_t tpacpi_driver_wwan_emulstate_show( + struct device_driver *drv, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", !!tpacpi_wwan_emulstate); +} + +static ssize_t tpacpi_driver_wwan_emulstate_store( + struct device_driver *drv, + const char *buf, size_t count) +{ + unsigned long t; + + if (parse_strtoul(buf, 1, &t)) + return -EINVAL; + + tpacpi_wwan_emulstate = !!t; + + return count; +} + +static DRIVER_ATTR(wwan_emulstate, S_IWUSR | S_IRUGO, + tpacpi_driver_wwan_emulstate_show, + tpacpi_driver_wwan_emulstate_store); + +#endif + +/* --------------------------------------------------------------------- */ + static struct driver_attribute *tpacpi_driver_attributes[] = { &driver_attr_debug_level, &driver_attr_version, &driver_attr_interface_version, @@ -1022,6 +1120,15 @@ static int __init tpacpi_create_driver_attributes(struct device_driver *drv) i++; } +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES + if (!res && dbg_wlswemul) + res = driver_create_file(drv, &driver_attr_wlsw_emulstate); + if (!res && dbg_bluetoothemul) + res = driver_create_file(drv, &driver_attr_bluetooth_emulstate); + if (!res && dbg_wwanemul) + res = driver_create_file(drv, &driver_attr_wwan_emulstate); +#endif + return res; } @@ -1031,6 +1138,12 @@ static void tpacpi_remove_driver_attributes(struct device_driver *drv) for (i = 0; i < ARRAY_SIZE(tpacpi_driver_attributes); i++) driver_remove_file(drv, tpacpi_driver_attributes[i]); + +#ifdef THINKPAD_ACPI_DEBUGFACILITIES + driver_remove_file(drv, &driver_attr_wlsw_emulstate); + driver_remove_file(drv, &driver_attr_bluetooth_emulstate); + driver_remove_file(drv, &driver_attr_wwan_emulstate); +#endif } /**************************************************************************** @@ -1216,6 +1329,12 @@ static struct attribute_set *hotkey_dev_attributes; static int hotkey_get_wlsw(int *status) { +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES + if (dbg_wlswemul) { + *status = !!tpacpi_wlsw_emulstate; + return 0; + } +#endif if (!acpi_evalf(hkey_handle, status, "WLSW", "d")) return -EIO; return 0; @@ -2222,6 +2341,13 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) hotkey_source_mask, hotkey_poll_freq); #endif +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES + if (dbg_wlswemul) { + tp_features.hotkey_wlsw = 1; + printk(TPACPI_INFO + "radio switch emulation enabled\n"); + } else +#endif /* Not all thinkpads have a hardware radio switch */ if (acpi_evalf(hkey_handle, &status, "WLSW", "qd")) { tp_features.hotkey_wlsw = 1; @@ -2656,6 +2782,12 @@ static int bluetooth_get_radiosw(void) if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status) return RFKILL_STATE_HARD_BLOCKED; +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES + if (dbg_bluetoothemul) + return (tpacpi_bluetooth_emulstate) ? + RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED; +#endif + if (!acpi_evalf(hkey_handle, &status, "GBDC", "d")) return -EIO; @@ -2689,6 +2821,15 @@ static int bluetooth_set_radiosw(int radio_on, int update_rfk) && radio_on) return -EPERM; +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES + if (dbg_bluetoothemul) { + tpacpi_bluetooth_emulstate = !!radio_on; + if (update_rfk) + bluetooth_update_rfk(); + return 0; + } +#endif + if (!acpi_evalf(hkey_handle, &status, "GBDC", "d")) return -EIO; if (radio_on) @@ -2792,6 +2933,13 @@ static int __init bluetooth_init(struct ibm_init_struct *iibm) str_supported(tp_features.bluetooth), status); +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES + if (dbg_bluetoothemul) { + tp_features.bluetooth = 1; + printk(TPACPI_INFO + "bluetooth switch emulation enabled\n"); + } else +#endif if (tp_features.bluetooth && !(status & TP_ACPI_BLUETOOTH_HWPRESENT)) { /* no bluetooth hardware present in system */ @@ -2890,6 +3038,12 @@ static int wan_get_radiosw(void) if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status) return RFKILL_STATE_HARD_BLOCKED; +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES + if (dbg_wwanemul) + return (tpacpi_wwan_emulstate) ? + RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED; +#endif + if (!acpi_evalf(hkey_handle, &status, "GWAN", "d")) return -EIO; @@ -2923,6 +3077,15 @@ static int wan_set_radiosw(int radio_on, int update_rfk) && radio_on) return -EPERM; +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES + if (dbg_wwanemul) { + tpacpi_wwan_emulstate = !!radio_on; + if (update_rfk) + wan_update_rfk(); + return 0; + } +#endif + if (!acpi_evalf(hkey_handle, &status, "GWAN", "d")) return -EIO; if (radio_on) @@ -3024,6 +3187,13 @@ static int __init wan_init(struct ibm_init_struct *iibm) str_supported(tp_features.wan), status); +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES + if (dbg_wwanemul) { + tp_features.wan = 1; + printk(TPACPI_INFO + "wwan switch emulation enabled\n"); + } else +#endif if (tp_features.wan && !(status & TP_ACPI_WANCARD_HWPRESENT)) { /* no wan hardware present in system */ @@ -6701,6 +6871,26 @@ TPACPI_PARAM(brightness); TPACPI_PARAM(volume); TPACPI_PARAM(fan); +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES +module_param(dbg_wlswemul, uint, 0); +MODULE_PARM_DESC(dbg_wlswemul, "Enables WLSW emulation"); +module_param_named(wlsw_state, tpacpi_wlsw_emulstate, bool, 0); +MODULE_PARM_DESC(wlsw_state, + "Initial state of the emulated WLSW switch"); + +module_param(dbg_bluetoothemul, uint, 0); +MODULE_PARM_DESC(dbg_bluetoothemul, "Enables bluetooth switch emulation"); +module_param_named(bluetooth_state, tpacpi_bluetooth_emulstate, bool, 0); +MODULE_PARM_DESC(bluetooth_state, + "Initial state of the emulated bluetooth switch"); + +module_param(dbg_wwanemul, uint, 0); +MODULE_PARM_DESC(dbg_wwanemul, "Enables WWAN switch emulation"); +module_param_named(wwan_state, tpacpi_wwan_emulstate, bool, 0); +MODULE_PARM_DESC(wwan_state, + "Initial state of the emulated WWAN switch"); +#endif + static void thinkpad_acpi_module_exit(void) { struct ibm_struct *ibm, *itmp; -- cgit From 153f82207c51193e4d6a7e6f0e3f9442eabeba1c Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sun, 11 Jan 2009 03:01:01 -0200 Subject: ACPI: thinkpad-acpi: resume with radios disabled Instruct the firmware to not enable the radios when resuming. This is safer, and the rfkill core will take care to manually enable any radios that need to be enabled. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Len Brown --- drivers/platform/x86/thinkpad_acpi.c | 66 ++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index a086ce8ed4e..b2c5913ff72 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -122,6 +122,27 @@ enum { #define TPACPI_HKEY_INPUT_PRODUCT 0x5054 /* "TP" */ #define TPACPI_HKEY_INPUT_VERSION 0x4101 +/* ACPI \WGSV commands */ +enum { + TP_ACPI_WGSV_GET_STATE = 0x01, /* Get state information */ + TP_ACPI_WGSV_PWR_ON_ON_RESUME = 0x02, /* Resume WWAN powered on */ + TP_ACPI_WGSV_PWR_OFF_ON_RESUME = 0x03, /* Resume WWAN powered off */ + TP_ACPI_WGSV_SAVE_STATE = 0x04, /* Save state for S4/S5 */ +}; + +/* TP_ACPI_WGSV_GET_STATE bits */ +enum { + TP_ACPI_WGSV_STATE_WWANEXIST = 0x0001, /* WWAN hw available */ + TP_ACPI_WGSV_STATE_WWANPWR = 0x0002, /* WWAN radio enabled */ + TP_ACPI_WGSV_STATE_WWANPWRRES = 0x0004, /* WWAN state at resume */ + TP_ACPI_WGSV_STATE_WWANBIOSOFF = 0x0008, /* WWAN disabled in BIOS */ + TP_ACPI_WGSV_STATE_BLTHEXIST = 0x0001, /* BLTH hw available */ + TP_ACPI_WGSV_STATE_BLTHPWR = 0x0002, /* BLTH radio enabled */ + TP_ACPI_WGSV_STATE_BLTHPWRRES = 0x0004, /* BLTH state at resume */ + TP_ACPI_WGSV_STATE_BLTHBIOSOFF = 0x0008, /* BLTH disabled in BIOS */ + TP_ACPI_WGSV_STATE_UWBEXIST = 0x0010, /* UWB hw available */ + TP_ACPI_WGSV_STATE_UWBPWR = 0x0020, /* UWB radio enabled */ +}; /**************************************************************************** * Main driver @@ -2766,11 +2787,28 @@ enum { /* ACPI GBDC/SBDC bits */ TP_ACPI_BLUETOOTH_HWPRESENT = 0x01, /* Bluetooth hw available */ TP_ACPI_BLUETOOTH_RADIOSSW = 0x02, /* Bluetooth radio enabled */ - TP_ACPI_BLUETOOTH_UNK = 0x04, /* unknown function */ + TP_ACPI_BLUETOOTH_RESUMECTRL = 0x04, /* Bluetooth state at resume: + off / last state */ +}; + +enum { + /* ACPI \BLTH commands */ + TP_ACPI_BLTH_GET_ULTRAPORT_ID = 0x00, /* Get Ultraport BT ID */ + TP_ACPI_BLTH_GET_PWR_ON_RESUME = 0x01, /* Get power-on-resume state */ + TP_ACPI_BLTH_PWR_ON_ON_RESUME = 0x02, /* Resume powered on */ + TP_ACPI_BLTH_PWR_OFF_ON_RESUME = 0x03, /* Resume powered off */ + TP_ACPI_BLTH_SAVE_STATE = 0x05, /* Save state for S4/S5 */ }; static struct rfkill *tpacpi_bluetooth_rfkill; +static void bluetooth_suspend(pm_message_t state) +{ + /* Try to make sure radio will resume powered off */ + acpi_evalf(NULL, NULL, "\\BLTH", "vd", + TP_ACPI_BLTH_PWR_OFF_ON_RESUME); +} + static int bluetooth_get_radiosw(void) { int status; @@ -2830,12 +2868,11 @@ static int bluetooth_set_radiosw(int radio_on, int update_rfk) } #endif - if (!acpi_evalf(hkey_handle, &status, "GBDC", "d")) - return -EIO; + /* We make sure to keep TP_ACPI_BLUETOOTH_RESUMECTRL off */ if (radio_on) - status |= TP_ACPI_BLUETOOTH_RADIOSSW; + status = TP_ACPI_BLUETOOTH_RADIOSSW; else - status &= ~TP_ACPI_BLUETOOTH_RADIOSSW; + status = 0; if (!acpi_evalf(hkey_handle, NULL, "SBDC", "vd", status)) return -EIO; @@ -3012,6 +3049,7 @@ static struct ibm_struct bluetooth_driver_data = { .read = bluetooth_read, .write = bluetooth_write, .exit = bluetooth_exit, + .suspend = bluetooth_suspend, }; /************************************************************************* @@ -3022,11 +3060,19 @@ enum { /* ACPI GWAN/SWAN bits */ TP_ACPI_WANCARD_HWPRESENT = 0x01, /* Wan hw available */ TP_ACPI_WANCARD_RADIOSSW = 0x02, /* Wan radio enabled */ - TP_ACPI_WANCARD_UNK = 0x04, /* unknown function */ + TP_ACPI_WANCARD_RESUMECTRL = 0x04, /* Wan state at resume: + off / last state */ }; static struct rfkill *tpacpi_wan_rfkill; +static void wan_suspend(pm_message_t state) +{ + /* Try to make sure radio will resume powered off */ + acpi_evalf(NULL, NULL, "\\WGSV", "qvd", + TP_ACPI_WGSV_PWR_OFF_ON_RESUME); +} + static int wan_get_radiosw(void) { int status; @@ -3086,12 +3132,11 @@ static int wan_set_radiosw(int radio_on, int update_rfk) } #endif - if (!acpi_evalf(hkey_handle, &status, "GWAN", "d")) - return -EIO; + /* We make sure to keep TP_ACPI_WANCARD_RESUMECTRL off */ if (radio_on) - status |= TP_ACPI_WANCARD_RADIOSSW; + status = TP_ACPI_WANCARD_RADIOSSW; else - status &= ~TP_ACPI_WANCARD_RADIOSSW; + status = 0; if (!acpi_evalf(hkey_handle, NULL, "SWAN", "vd", status)) return -EIO; @@ -3266,6 +3311,7 @@ static struct ibm_struct wan_driver_data = { .read = wan_read, .write = wan_write, .exit = wan_exit, + .suspend = wan_suspend, }; /************************************************************************* -- cgit From 90d9d3c79c44bcf95bc487e9bbceaff2de370310 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sun, 11 Jan 2009 03:01:02 -0200 Subject: ACPI: thinkpad-acpi: preserve radio state across shutdown Store in firmware NVRAM the radio state on machine shutdown for WWAN and bluetooth. Also, try to set the initial boot state of these radios as the rfkill default state for their respective classes. Signed-off-by: Henrique de Moraes Holschuh Cc: Ivo van Doorn Signed-off-by: Len Brown --- drivers/platform/x86/thinkpad_acpi.c | 59 +++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index b2c5913ff72..27d709bac98 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -222,6 +222,7 @@ struct ibm_struct { void (*exit) (void); void (*resume) (void); void (*suspend) (pm_message_t state); + void (*shutdown) (void); struct list_head all_drivers; @@ -759,6 +760,18 @@ static int tpacpi_resume_handler(struct platform_device *pdev) return 0; } +static void tpacpi_shutdown_handler(struct platform_device *pdev) +{ + struct ibm_struct *ibm, *itmp; + + list_for_each_entry_safe(ibm, itmp, + &tpacpi_all_drivers, + all_drivers) { + if (ibm->shutdown) + (ibm->shutdown)(); + } +} + static struct platform_driver tpacpi_pdriver = { .driver = { .name = TPACPI_DRVR_NAME, @@ -766,6 +779,7 @@ static struct platform_driver tpacpi_pdriver = { }, .suspend = tpacpi_suspend_handler, .resume = tpacpi_resume_handler, + .shutdown = tpacpi_shutdown_handler, }; static struct platform_driver tpacpi_hwmon_pdriver = { @@ -957,7 +971,22 @@ static int __init tpacpi_new_rfkill(const unsigned int id, int (*get_state)(void *, enum rfkill_state *)) { int res; - enum rfkill_state initial_state; + enum rfkill_state initial_state = RFKILL_STATE_SOFT_BLOCKED; + + res = get_state(NULL, &initial_state); + if (res < 0) { + printk(TPACPI_ERR + "failed to read initial state for %s, error %d; " + "will turn radio off\n", name, res); + } else { + /* try to set the initial state as the default for the rfkill + * type, since we ask the firmware to preserve it across S5 in + * NVRAM */ + rfkill_set_default(rfktype, + (initial_state == RFKILL_STATE_UNBLOCKED) ? + RFKILL_STATE_UNBLOCKED : + RFKILL_STATE_SOFT_BLOCKED); + } *rfk = rfkill_allocate(&tpacpi_pdev->dev, rfktype); if (!*rfk) { @@ -969,9 +998,7 @@ static int __init tpacpi_new_rfkill(const unsigned int id, (*rfk)->name = name; (*rfk)->get_state = get_state; (*rfk)->toggle_radio = toggle_radio; - - if (!get_state(NULL, &initial_state)) - (*rfk)->state = initial_state; + (*rfk)->state = initial_state; res = rfkill_register(*rfk); if (res < 0) { @@ -2943,8 +2970,19 @@ static int tpacpi_bluetooth_rfk_set(void *data, enum rfkill_state state) return bluetooth_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0); } +static void bluetooth_shutdown(void) +{ + /* Order firmware to save current state to NVRAM */ + if (!acpi_evalf(NULL, NULL, "\\BLTH", "vd", + TP_ACPI_BLTH_SAVE_STATE)) + printk(TPACPI_NOTICE + "failed to save bluetooth state to NVRAM\n"); +} + static void bluetooth_exit(void) { + bluetooth_shutdown(); + if (tpacpi_bluetooth_rfkill) rfkill_unregister(tpacpi_bluetooth_rfkill); @@ -3050,6 +3088,7 @@ static struct ibm_struct bluetooth_driver_data = { .write = bluetooth_write, .exit = bluetooth_exit, .suspend = bluetooth_suspend, + .shutdown = bluetooth_shutdown, }; /************************************************************************* @@ -3207,8 +3246,19 @@ static int tpacpi_wan_rfk_set(void *data, enum rfkill_state state) return wan_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0); } +static void wan_shutdown(void) +{ + /* Order firmware to save current state to NVRAM */ + if (!acpi_evalf(NULL, NULL, "\\WGSV", "vd", + TP_ACPI_WGSV_SAVE_STATE)) + printk(TPACPI_NOTICE + "failed to save WWAN state to NVRAM\n"); +} + static void wan_exit(void) { + wan_shutdown(); + if (tpacpi_wan_rfkill) rfkill_unregister(tpacpi_wan_rfkill); @@ -3312,6 +3362,7 @@ static struct ibm_struct wan_driver_data = { .write = wan_write, .exit = wan_exit, .suspend = wan_suspend, + .shutdown = wan_shutdown, }; /************************************************************************* -- cgit From 0045c0aa7d5e787f78938e6a10927b8a516f0b83 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sun, 11 Jan 2009 03:01:03 -0200 Subject: ACPI: thinkpad-acpi: add UWB radio support Add rfkill support for USB UWB radio devices on very recent ThinkPad laptop models. The new subdriver is moslty a trimmed down copy of the wwan subdriver. Signed-off-by: Henrique de Moraes Holschuh Cc: Ivo van Doorn Signed-off-by: Len Brown --- Documentation/laptops/thinkpad-acpi.txt | 18 +++ drivers/platform/x86/thinkpad_acpi.c | 206 +++++++++++++++++++++++++++++++- 2 files changed, 223 insertions(+), 1 deletion(-) diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index ddc371e0a1a..91c00010b15 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt @@ -1413,6 +1413,24 @@ Sysfs notes: rfkill controller switch "tpacpi_wwan_sw": refer to Documentation/rfkill.txt for details. +EXPERIMENTAL: UWB +----------------- + +This feature is marked EXPERIMENTAL because it has not been extensively +tested and validated in various ThinkPad models yet. The feature may not +work as expected. USE WITH CAUTION! To use this feature, you need to supply +the experimental=1 parameter when loading the module. + +sysfs rfkill class: switch "tpacpi_uwb_sw" + +This feature exports an rfkill controller for the UWB device, if one is +present and enabled in the BIOS. + +Sysfs notes: + + rfkill controller switch "tpacpi_uwb_sw": refer to + Documentation/rfkill.txt for details. + Multiple Commands, Module Parameters ------------------------------------ diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 27d709bac98..c1d40410ad7 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -169,6 +169,7 @@ enum { enum { TPACPI_RFK_BLUETOOTH_SW_ID = 0, TPACPI_RFK_WWAN_SW_ID, + TPACPI_RFK_UWB_SW_ID, }; /* Debugging */ @@ -261,6 +262,7 @@ static struct { u32 bright_16levels:1; u32 bright_acpimode:1; u32 wan:1; + u32 uwb:1; u32 fan_ctrl_status_undef:1; u32 input_device_registered:1; u32 platform_drv_registered:1; @@ -317,6 +319,8 @@ static int dbg_bluetoothemul; static int tpacpi_bluetooth_emulstate; static int dbg_wwanemul; static int tpacpi_wwan_emulstate; +static int dbg_uwbemul; +static int tpacpi_uwb_emulstate; #endif @@ -967,6 +971,7 @@ static int __init tpacpi_new_rfkill(const unsigned int id, struct rfkill **rfk, const enum rfkill_type rfktype, const char *name, + const bool set_default, int (*toggle_radio)(void *, enum rfkill_state), int (*get_state)(void *, enum rfkill_state *)) { @@ -978,7 +983,7 @@ static int __init tpacpi_new_rfkill(const unsigned int id, printk(TPACPI_ERR "failed to read initial state for %s, error %d; " "will turn radio off\n", name, res); - } else { + } else if (set_default) { /* try to set the initial state as the default for the rfkill * type, since we ask the firmware to preserve it across S5 in * NVRAM */ @@ -1148,6 +1153,31 @@ static DRIVER_ATTR(wwan_emulstate, S_IWUSR | S_IRUGO, tpacpi_driver_wwan_emulstate_show, tpacpi_driver_wwan_emulstate_store); +/* uwb_emulstate ------------------------------------------------- */ +static ssize_t tpacpi_driver_uwb_emulstate_show( + struct device_driver *drv, + char *buf) +{ + return snprintf(buf, PAGE_SIZE, "%d\n", !!tpacpi_uwb_emulstate); +} + +static ssize_t tpacpi_driver_uwb_emulstate_store( + struct device_driver *drv, + const char *buf, size_t count) +{ + unsigned long t; + + if (parse_strtoul(buf, 1, &t)) + return -EINVAL; + + tpacpi_uwb_emulstate = !!t; + + return count; +} + +static DRIVER_ATTR(uwb_emulstate, S_IWUSR | S_IRUGO, + tpacpi_driver_uwb_emulstate_show, + tpacpi_driver_uwb_emulstate_store); #endif /* --------------------------------------------------------------------- */ @@ -1175,6 +1205,8 @@ static int __init tpacpi_create_driver_attributes(struct device_driver *drv) res = driver_create_file(drv, &driver_attr_bluetooth_emulstate); if (!res && dbg_wwanemul) res = driver_create_file(drv, &driver_attr_wwan_emulstate); + if (!res && dbg_uwbemul) + res = driver_create_file(drv, &driver_attr_uwb_emulstate); #endif return res; @@ -1191,6 +1223,7 @@ static void tpacpi_remove_driver_attributes(struct device_driver *drv) driver_remove_file(drv, &driver_attr_wlsw_emulstate); driver_remove_file(drv, &driver_attr_bluetooth_emulstate); driver_remove_file(drv, &driver_attr_wwan_emulstate); + driver_remove_file(drv, &driver_attr_uwb_emulstate); #endif } @@ -2125,6 +2158,7 @@ static struct attribute *hotkey_mask_attributes[] __initdata = { static void bluetooth_update_rfk(void); static void wan_update_rfk(void); +static void uwb_update_rfk(void); static void tpacpi_send_radiosw_update(void) { int wlsw; @@ -2134,6 +2168,8 @@ static void tpacpi_send_radiosw_update(void) bluetooth_update_rfk(); if (tp_features.wan) wan_update_rfk(); + if (tp_features.uwb) + uwb_update_rfk(); if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&wlsw)) { mutex_lock(&tpacpi_inputdev_send_mutex); @@ -3035,6 +3071,7 @@ static int __init bluetooth_init(struct ibm_init_struct *iibm) &tpacpi_bluetooth_rfkill, RFKILL_TYPE_BLUETOOTH, "tpacpi_bluetooth_sw", + true, tpacpi_bluetooth_rfk_set, tpacpi_bluetooth_rfk_get); if (res) { @@ -3309,6 +3346,7 @@ static int __init wan_init(struct ibm_init_struct *iibm) &tpacpi_wan_rfkill, RFKILL_TYPE_WWAN, "tpacpi_wwan_sw", + true, tpacpi_wan_rfk_set, tpacpi_wan_rfk_get); if (res) { @@ -3365,6 +3403,162 @@ static struct ibm_struct wan_driver_data = { .shutdown = wan_shutdown, }; +/************************************************************************* + * UWB subdriver + */ + +enum { + /* ACPI GUWB/SUWB bits */ + TP_ACPI_UWB_HWPRESENT = 0x01, /* UWB hw available */ + TP_ACPI_UWB_RADIOSSW = 0x02, /* UWB radio enabled */ +}; + +static struct rfkill *tpacpi_uwb_rfkill; + +static int uwb_get_radiosw(void) +{ + int status; + + if (!tp_features.uwb) + return -ENODEV; + + /* WLSW overrides UWB in firmware/hardware, reflect that */ + if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status) + return RFKILL_STATE_HARD_BLOCKED; + +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES + if (dbg_uwbemul) + return (tpacpi_uwb_emulstate) ? + RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED; +#endif + + if (!acpi_evalf(hkey_handle, &status, "GUWB", "d")) + return -EIO; + + return ((status & TP_ACPI_UWB_RADIOSSW) != 0) ? + RFKILL_STATE_UNBLOCKED : RFKILL_STATE_SOFT_BLOCKED; +} + +static void uwb_update_rfk(void) +{ + int status; + + if (!tpacpi_uwb_rfkill) + return; + + status = uwb_get_radiosw(); + if (status < 0) + return; + rfkill_force_state(tpacpi_uwb_rfkill, status); +} + +static int uwb_set_radiosw(int radio_on, int update_rfk) +{ + int status; + + if (!tp_features.uwb) + return -ENODEV; + + /* WLSW overrides UWB in firmware/hardware, but there is no + * reason to risk weird behaviour. */ + if (tp_features.hotkey_wlsw && !hotkey_get_wlsw(&status) && !status + && radio_on) + return -EPERM; + +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES + if (dbg_uwbemul) { + tpacpi_uwb_emulstate = !!radio_on; + if (update_rfk) + uwb_update_rfk(); + return 0; + } +#endif + + status = (radio_on) ? TP_ACPI_UWB_RADIOSSW : 0; + if (!acpi_evalf(hkey_handle, NULL, "SUWB", "vd", status)) + return -EIO; + + if (update_rfk) + uwb_update_rfk(); + + return 0; +} + +/* --------------------------------------------------------------------- */ + +static int tpacpi_uwb_rfk_get(void *data, enum rfkill_state *state) +{ + int uwbs = uwb_get_radiosw(); + + if (uwbs < 0) + return uwbs; + + *state = uwbs; + return 0; +} + +static int tpacpi_uwb_rfk_set(void *data, enum rfkill_state state) +{ + return uwb_set_radiosw((state == RFKILL_STATE_UNBLOCKED), 0); +} + +static void uwb_exit(void) +{ + if (tpacpi_uwb_rfkill) + rfkill_unregister(tpacpi_uwb_rfkill); +} + +static int __init uwb_init(struct ibm_init_struct *iibm) +{ + int res; + int status = 0; + + vdbg_printk(TPACPI_DBG_INIT, "initializing uwb subdriver\n"); + + TPACPI_ACPIHANDLE_INIT(hkey); + + tp_features.uwb = hkey_handle && + acpi_evalf(hkey_handle, &status, "GUWB", "qd"); + + vdbg_printk(TPACPI_DBG_INIT, "uwb is %s, status 0x%02x\n", + str_supported(tp_features.uwb), + status); + +#ifdef CONFIG_THINKPAD_ACPI_DEBUGFACILITIES + if (dbg_uwbemul) { + tp_features.uwb = 1; + printk(TPACPI_INFO + "uwb switch emulation enabled\n"); + } else +#endif + if (tp_features.uwb && + !(status & TP_ACPI_UWB_HWPRESENT)) { + /* no uwb hardware present in system */ + tp_features.uwb = 0; + dbg_printk(TPACPI_DBG_INIT, + "uwb hardware not installed\n"); + } + + if (!tp_features.uwb) + return 1; + + res = tpacpi_new_rfkill(TPACPI_RFK_UWB_SW_ID, + &tpacpi_uwb_rfkill, + RFKILL_TYPE_UWB, + "tpacpi_uwb_sw", + false, + tpacpi_uwb_rfk_set, + tpacpi_uwb_rfk_get); + + return res; +} + +static struct ibm_struct uwb_driver_data = { + .name = "uwb", + .exit = uwb_exit, + .flags.experimental = 1, +}; + /************************************************************************* * Video subdriver */ @@ -6830,6 +7024,10 @@ static struct ibm_init_struct ibms_init[] __initdata = { .init = wan_init, .data = &wan_driver_data, }, + { + .init = uwb_init, + .data = &uwb_driver_data, + }, #ifdef CONFIG_THINKPAD_ACPI_VIDEO { .init = video_init, @@ -6986,6 +7184,12 @@ MODULE_PARM_DESC(dbg_wwanemul, "Enables WWAN switch emulation"); module_param_named(wwan_state, tpacpi_wwan_emulstate, bool, 0); MODULE_PARM_DESC(wwan_state, "Initial state of the emulated WWAN switch"); + +module_param(dbg_uwbemul, uint, 0); +MODULE_PARM_DESC(dbg_uwbemul, "Enables UWB switch emulation"); +module_param_named(uwb_state, tpacpi_uwb_emulstate, bool, 0); +MODULE_PARM_DESC(uwb_state, + "Initial state of the emulated UWB switch"); #endif static void thinkpad_acpi_module_exit(void) -- cgit From 88fc241f54459ac3d86c5e13b449730199f66061 Mon Sep 17 00:00:00 2001 From: Doug Chapman Date: Thu, 15 Jan 2009 10:38:56 -0800 Subject: [IA64] dump stack on kernel unaligned warnings Often the cause of kernel unaligned access warnings is not obvious from just the ip displayed in the warning. This adds the option via proc to dump the stack in addition to the warning. The default is off (just display the 1 line warning). To enable the stack to be shown: echo 1 > /proc/sys/kernel/unaligned-dump-stack Signed-off-by: Doug Chapman Signed-off-by: Tony Luck --- arch/ia64/kernel/unaligned.c | 6 +++++- kernel/sysctl.c | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index ff0e7c10faa..6db08599ebb 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -59,6 +59,7 @@ dump (const char *str, void *vp, size_t len) * (i.e. don't allow attacker to fill up logs with unaligned accesses). */ int no_unaligned_warning; +int unaligned_dump_stack; static int noprint_warning; /* @@ -1371,9 +1372,12 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) } } } else { - if (within_logging_rate_limit()) + if (within_logging_rate_limit()) { printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n", ifa, regs->cr_iip + ipsr->ri); + if (unaligned_dump_stack) + dump_stack(); + } set_fs(KERNEL_DS); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3e38b74b612..368d1638ee7 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -144,6 +144,7 @@ extern int acct_parm[]; #ifdef CONFIG_IA64 extern int no_unaligned_warning; +extern int unaligned_dump_stack; #endif #ifdef CONFIG_RT_MUTEXES @@ -781,6 +782,14 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = &proc_dointvec, }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "unaligned-dump-stack", + .data = &unaligned_dump_stack, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #endif #ifdef CONFIG_DETECT_SOFTLOCKUP { -- cgit From 7646ea88af80a92f2775e17d4283830d7f09ea2d Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sun, 11 Jan 2009 03:01:04 -0200 Subject: ACPI: thinkpad-acpi: use killable instead of interruptible mutexes Unfortunately, POSIX in all of its braindamage, do not state that userspace has to deal with EINTR in read/write and friends... so, lesser code just doesn't. Switch from *_interruptible to *_killable on the sysfs- and procfs-related mutexes. This closes this possible can of worms. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Len Brown --- drivers/platform/x86/thinkpad_acpi.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index c1d40410ad7..7670c8ee63d 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -1878,7 +1878,7 @@ static ssize_t hotkey_mask_show(struct device *dev, { int res; - if (mutex_lock_interruptible(&hotkey_mutex)) + if (mutex_lock_killable(&hotkey_mutex)) return -ERESTARTSYS; res = hotkey_mask_get(); mutex_unlock(&hotkey_mutex); @@ -1897,7 +1897,7 @@ static ssize_t hotkey_mask_store(struct device *dev, if (parse_strtoul(buf, 0xffffffffUL, &t)) return -EINVAL; - if (mutex_lock_interruptible(&hotkey_mutex)) + if (mutex_lock_killable(&hotkey_mutex)) return -ERESTARTSYS; res = hotkey_mask_set(t); @@ -1983,7 +1983,7 @@ static ssize_t hotkey_source_mask_store(struct device *dev, ((t & ~TPACPI_HKEY_NVRAM_KNOWN_MASK) != 0)) return -EINVAL; - if (mutex_lock_interruptible(&hotkey_mutex)) + if (mutex_lock_killable(&hotkey_mutex)) return -ERESTARTSYS; HOTKEY_CONFIG_CRITICAL_START @@ -2018,7 +2018,7 @@ static ssize_t hotkey_poll_freq_store(struct device *dev, if (parse_strtoul(buf, 25, &t)) return -EINVAL; - if (mutex_lock_interruptible(&hotkey_mutex)) + if (mutex_lock_killable(&hotkey_mutex)) return -ERESTARTSYS; hotkey_poll_freq = t; @@ -2754,7 +2754,7 @@ static int hotkey_read(char *p) return len; } - if (mutex_lock_interruptible(&hotkey_mutex)) + if (mutex_lock_killable(&hotkey_mutex)) return -ERESTARTSYS; res = hotkey_status_get(&status); if (!res) @@ -2785,7 +2785,7 @@ static int hotkey_write(char *buf) if (!tp_features.hotkey) return -ENODEV; - if (mutex_lock_interruptible(&hotkey_mutex)) + if (mutex_lock_killable(&hotkey_mutex)) return -ERESTARTSYS; status = -1; @@ -5311,7 +5311,7 @@ static int brightness_set(int value) value < 0) return -EINVAL; - res = mutex_lock_interruptible(&brightness_mutex); + res = mutex_lock_killable(&brightness_mutex); if (res < 0) return res; @@ -5849,7 +5849,7 @@ static int fan_get_status_safe(u8 *status) int rc; u8 s; - if (mutex_lock_interruptible(&fan_mutex)) + if (mutex_lock_killable(&fan_mutex)) return -ERESTARTSYS; rc = fan_get_status(&s); if (!rc) @@ -5932,7 +5932,7 @@ static int fan_set_level_safe(int level) if (!fan_control_allowed) return -EPERM; - if (mutex_lock_interruptible(&fan_mutex)) + if (mutex_lock_killable(&fan_mutex)) return -ERESTARTSYS; if (level == TPACPI_FAN_LAST_LEVEL) @@ -5954,7 +5954,7 @@ static int fan_set_enable(void) if (!fan_control_allowed) return -EPERM; - if (mutex_lock_interruptible(&fan_mutex)) + if (mutex_lock_killable(&fan_mutex)) return -ERESTARTSYS; switch (fan_control_access_mode) { @@ -6009,7 +6009,7 @@ static int fan_set_disable(void) if (!fan_control_allowed) return -EPERM; - if (mutex_lock_interruptible(&fan_mutex)) + if (mutex_lock_killable(&fan_mutex)) return -ERESTARTSYS; rc = 0; @@ -6047,7 +6047,7 @@ static int fan_set_speed(int speed) if (!fan_control_allowed) return -EPERM; - if (mutex_lock_interruptible(&fan_mutex)) + if (mutex_lock_killable(&fan_mutex)) return -ERESTARTSYS; rc = 0; @@ -6249,7 +6249,7 @@ static ssize_t fan_pwm1_store(struct device *dev, /* scale down from 0-255 to 0-7 */ newlevel = (s >> 5) & 0x07; - if (mutex_lock_interruptible(&fan_mutex)) + if (mutex_lock_killable(&fan_mutex)) return -ERESTARTSYS; rc = fan_get_status(&status); -- cgit From 3827e7a3fd03718d4d204c66d9e3ab9b125ae552 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sun, 11 Jan 2009 03:01:05 -0200 Subject: ACPI: thinkpad-acpi: clean up hotkey_notify() Clean up the hotkey_notify() handler, which handles the HKEY notifications from the ACPI firmware. It was getting too long and deep. No functional changes. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Len Brown --- drivers/platform/x86/thinkpad_acpi.c | 174 ++++++++++++++++++++++------------- 1 file changed, 109 insertions(+), 65 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 7670c8ee63d..d833ee689f9 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -2571,13 +2571,100 @@ err_exit: return (res < 0)? res : 1; } +static bool hotkey_notify_hotkey(const u32 hkey, + bool *send_acpi_ev, + bool *ignore_acpi_ev) +{ + /* 0x1000-0x1FFF: key presses */ + unsigned int scancode = hkey & 0xfff; + *send_acpi_ev = true; + *ignore_acpi_ev = false; + + if (scancode > 0 && scancode < 0x21) { + scancode--; + if (!(hotkey_source_mask & (1 << scancode))) { + tpacpi_input_send_key(scancode); + *send_acpi_ev = false; + } else { + *ignore_acpi_ev = true; + } + return true; + } + return false; +} + +static bool hotkey_notify_wakeup(const u32 hkey, + bool *send_acpi_ev, + bool *ignore_acpi_ev) +{ + /* 0x2000-0x2FFF: Wakeup reason */ + *send_acpi_ev = true; + *ignore_acpi_ev = false; + + switch (hkey) { + case 0x2304: /* suspend, undock */ + case 0x2404: /* hibernation, undock */ + hotkey_wakeup_reason = TP_ACPI_WAKEUP_UNDOCK; + *ignore_acpi_ev = true; + break; + + case 0x2305: /* suspend, bay eject */ + case 0x2405: /* hibernation, bay eject */ + hotkey_wakeup_reason = TP_ACPI_WAKEUP_BAYEJ; + *ignore_acpi_ev = true; + break; + + default: + return false; + } + + if (hotkey_wakeup_reason != TP_ACPI_WAKEUP_NONE) { + printk(TPACPI_INFO + "woke up due to a hot-unplug " + "request...\n"); + hotkey_wakeup_reason_notify_change(); + } + return true; +} + +static bool hotkey_notify_usrevent(const u32 hkey, + bool *send_acpi_ev, + bool *ignore_acpi_ev) +{ + /* 0x5000-0x5FFF: human interface helpers */ + *send_acpi_ev = true; + *ignore_acpi_ev = false; + + switch (hkey) { + case 0x5010: /* Lenovo new BIOS: brightness changed */ + case 0x500b: /* X61t: tablet pen inserted into bay */ + case 0x500c: /* X61t: tablet pen removed from bay */ + return true; + + case 0x5009: /* X41t-X61t: swivel up (tablet mode) */ + case 0x500a: /* X41t-X61t: swivel down (normal mode) */ + tpacpi_input_send_tabletsw(); + hotkey_tablet_mode_notify_change(); + *send_acpi_ev = false; + return true; + + case 0x5001: + case 0x5002: + /* LID switch events. Do not propagate */ + *ignore_acpi_ev = true; + return true; + + default: + return false; + } +} + static void hotkey_notify(struct ibm_struct *ibm, u32 event) { u32 hkey; - unsigned int scancode; - int send_acpi_ev; - int ignore_acpi_ev; - int unk_ev; + bool send_acpi_ev; + bool ignore_acpi_ev; + bool known_ev; if (event != 0x80) { printk(TPACPI_ERR @@ -2601,105 +2688,62 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) return; } - send_acpi_ev = 1; - ignore_acpi_ev = 0; - unk_ev = 0; + send_acpi_ev = true; + ignore_acpi_ev = false; switch (hkey >> 12) { case 1: /* 0x1000-0x1FFF: key presses */ - scancode = hkey & 0xfff; - if (scancode > 0 && scancode < 0x21) { - scancode--; - if (!(hotkey_source_mask & (1 << scancode))) { - tpacpi_input_send_key(scancode); - send_acpi_ev = 0; - } else { - ignore_acpi_ev = 1; - } - } else { - unk_ev = 1; - } + known_ev = hotkey_notify_hotkey(hkey, &send_acpi_ev, + &ignore_acpi_ev); break; case 2: - /* Wakeup reason */ - switch (hkey) { - case 0x2304: /* suspend, undock */ - case 0x2404: /* hibernation, undock */ - hotkey_wakeup_reason = TP_ACPI_WAKEUP_UNDOCK; - ignore_acpi_ev = 1; - break; - case 0x2305: /* suspend, bay eject */ - case 0x2405: /* hibernation, bay eject */ - hotkey_wakeup_reason = TP_ACPI_WAKEUP_BAYEJ; - ignore_acpi_ev = 1; - break; - default: - unk_ev = 1; - } - if (hotkey_wakeup_reason != TP_ACPI_WAKEUP_NONE) { - printk(TPACPI_INFO - "woke up due to a hot-unplug " - "request...\n"); - hotkey_wakeup_reason_notify_change(); - } + /* 0x2000-0x2FFF: Wakeup reason */ + known_ev = hotkey_notify_wakeup(hkey, &send_acpi_ev, + &ignore_acpi_ev); break; case 3: - /* bay-related wakeups */ + /* 0x3000-0x3FFF: bay-related wakeups */ if (hkey == 0x3003) { hotkey_autosleep_ack = 1; printk(TPACPI_INFO "bay ejected\n"); hotkey_wakeup_hotunplug_complete_notify_change(); + known_ev = true; } else { - unk_ev = 1; + known_ev = false; } break; case 4: - /* dock-related wakeups */ + /* 0x4000-0x4FFF: dock-related wakeups */ if (hkey == 0x4003) { hotkey_autosleep_ack = 1; printk(TPACPI_INFO "undocked\n"); hotkey_wakeup_hotunplug_complete_notify_change(); + known_ev = true; } else { - unk_ev = 1; + known_ev = false; } break; case 5: /* 0x5000-0x5FFF: human interface helpers */ - switch (hkey) { - case 0x5010: /* Lenovo new BIOS: brightness changed */ - case 0x500b: /* X61t: tablet pen inserted into bay */ - case 0x500c: /* X61t: tablet pen removed from bay */ - break; - case 0x5009: /* X41t-X61t: swivel up (tablet mode) */ - case 0x500a: /* X41t-X61t: swivel down (normal mode) */ - tpacpi_input_send_tabletsw(); - hotkey_tablet_mode_notify_change(); - send_acpi_ev = 0; - break; - case 0x5001: - case 0x5002: - /* LID switch events. Do not propagate */ - ignore_acpi_ev = 1; - break; - default: - unk_ev = 1; - } + known_ev = hotkey_notify_usrevent(hkey, &send_acpi_ev, + &ignore_acpi_ev); break; case 7: /* 0x7000-0x7FFF: misc */ if (tp_features.hotkey_wlsw && hkey == 0x7000) { tpacpi_send_radiosw_update(); send_acpi_ev = 0; + known_ev = true; break; } /* fallthrough to default */ default: - unk_ev = 1; + known_ev = false; } - if (unk_ev) { + if (!known_ev) { printk(TPACPI_NOTICE "unhandled HKEY event 0x%04x\n", hkey); } -- cgit From a6a3bb5c88d706c5efe0c86b3b669ac9ee012b3f Mon Sep 17 00:00:00 2001 From: Brent Casavant Date: Wed, 10 Dec 2008 09:46:16 -0600 Subject: [IA64] generic_defconfig: Enable SATA_VITESSE CONFIG_SATA_VITESSE=y was not added to generic_defconfig when sn2_defconfig was removed. SGI Altix systems that use an IO10 base IO card to drive the root device are unable to boot without the Vitesse controller. Signed-off-by: Brent Casavant Signed-off-by: Robin Holt Signed-off-by: Jes Sorensen Signed-off-by: Tony Luck --- arch/ia64/configs/generic_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig index 27eb67604c5..a109db30ce5 100644 --- a/arch/ia64/configs/generic_defconfig +++ b/arch/ia64/configs/generic_defconfig @@ -578,7 +578,7 @@ CONFIG_ATA_PIIX=y # CONFIG_SATA_SIS is not set # CONFIG_SATA_ULI is not set # CONFIG_SATA_VIA is not set -# CONFIG_SATA_VITESSE is not set +CONFIG_SATA_VITESSE=y # CONFIG_SATA_INIC162X is not set # CONFIG_PATA_ACPI is not set # CONFIG_PATA_ALI is not set -- cgit From 175add1981e53d22caba8f42d5f924a4de507b6c Mon Sep 17 00:00:00 2001 From: John Keller Date: Mon, 24 Nov 2008 16:47:17 -0600 Subject: [IA64] SN specific version of dma_get_required_mask() Create a platform specific version of dma_get_required_mask() for ia64 SN Altix. All SN Altix platforms support 64 bit DMA addressing regardless of the size of system memory. Create an ia64 machvec for dma_get_required_mask, with the SN version unconditionally returning DMA_64BIT_MASK. Signed-off-by: John Keller Signed-off-by: Tony Luck --- Documentation/DMA-API.txt | 9 ++++----- arch/ia64/include/asm/dma-mapping.h | 2 ++ arch/ia64/include/asm/machvec.h | 7 +++++++ arch/ia64/include/asm/machvec_init.h | 1 + arch/ia64/include/asm/machvec_sn2.h | 2 ++ arch/ia64/pci/pci.c | 27 +++++++++++++++++++++++++++ arch/ia64/sn/pci/pci_dma.c | 6 ++++++ 7 files changed, 49 insertions(+), 5 deletions(-) diff --git a/Documentation/DMA-API.txt b/Documentation/DMA-API.txt index b462bb14954..52441694fe0 100644 --- a/Documentation/DMA-API.txt +++ b/Documentation/DMA-API.txt @@ -170,16 +170,15 @@ Returns: 0 if successful and a negative error if not. u64 dma_get_required_mask(struct device *dev) -After setting the mask with dma_set_mask(), this API returns the -actual mask (within that already set) that the platform actually -requires to operate efficiently. Usually this means the returned mask +This API returns the mask that the platform requires to +operate efficiently. Usually this means the returned mask is the minimum required to cover all of memory. Examining the required mask gives drivers with variable descriptor sizes the opportunity to use smaller descriptors as necessary. Requesting the required mask does not alter the current mask. If you -wish to take advantage of it, you should issue another dma_set_mask() -call to lower the mask again. +wish to take advantage of it, you should issue a dma_set_mask() +call to set the mask to the value returned. Part Id - Streaming DMA mappings diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index bbab7e2b0fc..1f912d92758 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -9,6 +9,8 @@ #include #include +#define ARCH_HAS_DMA_GET_REQUIRED_MASK + struct dma_mapping_ops { int (*mapping_error)(struct device *dev, dma_addr_t dma_addr); diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h index 59c17e44668..fe87b212170 100644 --- a/arch/ia64/include/asm/machvec.h +++ b/arch/ia64/include/asm/machvec.h @@ -62,6 +62,7 @@ typedef dma_addr_t ia64_mv_dma_map_single_attrs (struct device *, void *, size_t typedef void ia64_mv_dma_unmap_single_attrs (struct device *, dma_addr_t, size_t, int, struct dma_attrs *); typedef int ia64_mv_dma_map_sg_attrs (struct device *, struct scatterlist *, int, int, struct dma_attrs *); typedef void ia64_mv_dma_unmap_sg_attrs (struct device *, struct scatterlist *, int, int, struct dma_attrs *); +typedef u64 ia64_mv_dma_get_required_mask (struct device *); /* * WARNING: The legacy I/O space is _architected_. Platforms are @@ -159,6 +160,7 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *); # define platform_dma_sync_sg_for_device ia64_mv.dma_sync_sg_for_device # define platform_dma_mapping_error ia64_mv.dma_mapping_error # define platform_dma_supported ia64_mv.dma_supported +# define platform_dma_get_required_mask ia64_mv.dma_get_required_mask # define platform_irq_to_vector ia64_mv.irq_to_vector # define platform_local_vector_to_irq ia64_mv.local_vector_to_irq # define platform_pci_get_legacy_mem ia64_mv.pci_get_legacy_mem @@ -213,6 +215,7 @@ struct ia64_machine_vector { ia64_mv_dma_sync_sg_for_device *dma_sync_sg_for_device; ia64_mv_dma_mapping_error *dma_mapping_error; ia64_mv_dma_supported *dma_supported; + ia64_mv_dma_get_required_mask *dma_get_required_mask; ia64_mv_irq_to_vector *irq_to_vector; ia64_mv_local_vector_to_irq *local_vector_to_irq; ia64_mv_pci_get_legacy_mem_t *pci_get_legacy_mem; @@ -263,6 +266,7 @@ struct ia64_machine_vector { platform_dma_sync_sg_for_device, \ platform_dma_mapping_error, \ platform_dma_supported, \ + platform_dma_get_required_mask, \ platform_irq_to_vector, \ platform_local_vector_to_irq, \ platform_pci_get_legacy_mem, \ @@ -366,6 +370,9 @@ extern void machvec_init_from_cmdline(const char *cmdline); #ifndef platform_dma_supported # define platform_dma_supported swiotlb_dma_supported #endif +#ifndef platform_dma_get_required_mask +# define platform_dma_get_required_mask ia64_dma_get_required_mask +#endif #ifndef platform_irq_to_vector # define platform_irq_to_vector __ia64_irq_to_vector #endif diff --git a/arch/ia64/include/asm/machvec_init.h b/arch/ia64/include/asm/machvec_init.h index ef964b28684..37a469849ab 100644 --- a/arch/ia64/include/asm/machvec_init.h +++ b/arch/ia64/include/asm/machvec_init.h @@ -3,6 +3,7 @@ extern ia64_mv_send_ipi_t ia64_send_ipi; extern ia64_mv_global_tlb_purge_t ia64_global_tlb_purge; +extern ia64_mv_dma_get_required_mask ia64_dma_get_required_mask; extern ia64_mv_irq_to_vector __ia64_irq_to_vector; extern ia64_mv_local_vector_to_irq __ia64_local_vector_to_irq; extern ia64_mv_pci_get_legacy_mem_t ia64_pci_get_legacy_mem; diff --git a/arch/ia64/include/asm/machvec_sn2.h b/arch/ia64/include/asm/machvec_sn2.h index 781308ea7b8..f1a6e0d6dfa 100644 --- a/arch/ia64/include/asm/machvec_sn2.h +++ b/arch/ia64/include/asm/machvec_sn2.h @@ -67,6 +67,7 @@ extern ia64_mv_dma_sync_single_for_device sn_dma_sync_single_for_device; extern ia64_mv_dma_sync_sg_for_device sn_dma_sync_sg_for_device; extern ia64_mv_dma_mapping_error sn_dma_mapping_error; extern ia64_mv_dma_supported sn_dma_supported; +extern ia64_mv_dma_get_required_mask sn_dma_get_required_mask; extern ia64_mv_migrate_t sn_migrate; extern ia64_mv_kernel_launch_event_t sn_kernel_launch_event; extern ia64_mv_setup_msi_irq_t sn_setup_msi_irq; @@ -123,6 +124,7 @@ extern ia64_mv_pci_fixup_bus_t sn_pci_fixup_bus; #define platform_dma_sync_sg_for_device sn_dma_sync_sg_for_device #define platform_dma_mapping_error sn_dma_mapping_error #define platform_dma_supported sn_dma_supported +#define platform_dma_get_required_mask sn_dma_get_required_mask #define platform_migrate sn_migrate #define platform_kernel_launch_event sn_kernel_launch_event #ifdef CONFIG_PCI_MSI diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c index 211fcfd115f..61f1af5c23c 100644 --- a/arch/ia64/pci/pci.c +++ b/arch/ia64/pci/pci.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -748,6 +749,32 @@ static void __init set_pci_cacheline_size(void) pci_cache_line_size = (1 << cci.pcci_line_size) / 4; } +u64 ia64_dma_get_required_mask(struct device *dev) +{ + u32 low_totalram = ((max_pfn - 1) << PAGE_SHIFT); + u32 high_totalram = ((max_pfn - 1) >> (32 - PAGE_SHIFT)); + u64 mask; + + if (!high_totalram) { + /* convert to mask just covering totalram */ + low_totalram = (1 << (fls(low_totalram) - 1)); + low_totalram += low_totalram - 1; + mask = low_totalram; + } else { + high_totalram = (1 << (fls(high_totalram) - 1)); + high_totalram += high_totalram - 1; + mask = (((u64)high_totalram) << 32) + 0xffffffff; + } + return mask; +} +EXPORT_SYMBOL_GPL(ia64_dma_get_required_mask); + +u64 dma_get_required_mask(struct device *dev) +{ + return platform_dma_get_required_mask(dev); +} +EXPORT_SYMBOL_GPL(dma_get_required_mask); + static int __init pcibios_init(void) { set_pci_cacheline_size(); diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c index 53ebb648449..863f5017baa 100644 --- a/arch/ia64/sn/pci/pci_dma.c +++ b/arch/ia64/sn/pci/pci_dma.c @@ -356,6 +356,12 @@ int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) } EXPORT_SYMBOL(sn_dma_mapping_error); +u64 sn_dma_get_required_mask(struct device *dev) +{ + return DMA_64BIT_MASK; +} +EXPORT_SYMBOL_GPL(sn_dma_get_required_mask); + char *sn_pci_get_legacy_mem(struct pci_bus *bus) { if (!SN_PCIBUS_BUSSOFT(bus)) -- cgit From 106b4e6657e10831f35c32afa26d9c11e6312783 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sun, 11 Jan 2009 03:01:06 -0200 Subject: ACPI: thinkpad-acpi: handle HKEY thermal and battery alarms Handle some HKEY events that are actually firmware alarms. For now, we do the simple thing: log specific messages to the log and let the thinkpad-specific event pass to userspace. In the future, these events will be migrated to generic notifications and subsystems. These alarms are NOT available on all ThinkPads. E.g. the T43 only issues 0x6011 and 0x6012. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Len Brown --- drivers/platform/x86/thinkpad_acpi.c | 63 +++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 4 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index d833ee689f9..c4e4b641d95 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -174,10 +174,12 @@ enum { /* Debugging */ #define TPACPI_LOG TPACPI_FILE ": " -#define TPACPI_ERR KERN_ERR TPACPI_LOG -#define TPACPI_NOTICE KERN_NOTICE TPACPI_LOG -#define TPACPI_INFO KERN_INFO TPACPI_LOG -#define TPACPI_DEBUG KERN_DEBUG TPACPI_LOG +#define TPACPI_ALERT KERN_ALERT TPACPI_LOG +#define TPACPI_CRIT KERN_CRIT TPACPI_LOG +#define TPACPI_ERR KERN_ERR TPACPI_LOG +#define TPACPI_NOTICE KERN_NOTICE TPACPI_LOG +#define TPACPI_INFO KERN_INFO TPACPI_LOG +#define TPACPI_DEBUG KERN_DEBUG TPACPI_LOG #define TPACPI_DBG_ALL 0xffff #define TPACPI_DBG_INIT 0x0001 @@ -2614,6 +2616,15 @@ static bool hotkey_notify_wakeup(const u32 hkey, *ignore_acpi_ev = true; break; + case 0x2313: /* Battery on critical low level (S3) */ + case 0x2413: /* Battery on critical low level (S4) */ + printk(TPACPI_ALERT + "EMERGENCY WAKEUP: battery almost empty\n"); + /* how to auto-heal: */ + /* 2313: woke up from S3, go to S4/S5 */ + /* 2413: woke up from S4, go to S5 */ + break; + default: return false; } @@ -2659,6 +2670,45 @@ static bool hotkey_notify_usrevent(const u32 hkey, } } +static bool hotkey_notify_thermal(const u32 hkey, + bool *send_acpi_ev, + bool *ignore_acpi_ev) +{ + /* 0x6000-0x6FFF: thermal alarms */ + *send_acpi_ev = true; + *ignore_acpi_ev = false; + + switch (hkey) { + case 0x6011: + printk(TPACPI_CRIT + "THERMAL ALARM: battery is too hot!\n"); + /* recommended action: warn user through gui */ + return true; + case 0x6012: + printk(TPACPI_ALERT + "THERMAL EMERGENCY: battery is extremely hot!\n"); + /* recommended action: immediate sleep/hibernate */ + return true; + case 0x6021: + printk(TPACPI_CRIT + "THERMAL ALARM: " + "a sensor reports something is too hot!\n"); + /* recommended action: warn user through gui, that */ + /* some internal component is too hot */ + return true; + case 0x6022: + printk(TPACPI_ALERT + "THERMAL EMERGENCY: " + "a sensor reports something is extremely hot!\n"); + /* recommended action: immediate sleep/hibernate */ + return true; + default: + printk(TPACPI_ALERT + "THERMAL ALERT: unknown thermal alarm received\n"); + return false; + } +} + static void hotkey_notify(struct ibm_struct *ibm, u32 event) { u32 hkey; @@ -2731,6 +2781,11 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) known_ev = hotkey_notify_usrevent(hkey, &send_acpi_ev, &ignore_acpi_ev); break; + case 6: + /* 0x6000-0x6FFF: thermal alarms */ + known_ev = hotkey_notify_thermal(hkey, &send_acpi_ev, + &ignore_acpi_ev); + break; case 7: /* 0x7000-0x7FFF: misc */ if (tp_features.hotkey_wlsw && hkey == 0x7000) { -- cgit From cb4293589855714b6d5079336019bf2af5fc41f8 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sun, 11 Jan 2009 03:01:07 -0200 Subject: ACPI: thinkpad-acpi: start the event hunt season Ask users to tell us about any unhandled events they find. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Len Brown --- drivers/platform/x86/thinkpad_acpi.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index c4e4b641d95..d7d41ae2f29 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -2801,6 +2801,9 @@ static void hotkey_notify(struct ibm_struct *ibm, u32 event) if (!known_ev) { printk(TPACPI_NOTICE "unhandled HKEY event 0x%04x\n", hkey); + printk(TPACPI_NOTICE + "please report the conditions when this " + "event happened to %s\n", TPACPI_MAIL); } /* Legacy events */ -- cgit From 1c2ece758a36b48133717e4db060fbe8fa52c5cd Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sun, 11 Jan 2009 03:01:08 -0200 Subject: ACPI: thinkpad-acpi: clean-up fan subdriver quirk Better document the Unitialized HFSP quirk, and modularize it a bit. This makes the code flow easier to read and reduces LOC. Apply the Unitialized HFSP closer to the source (i.e. inside the get_fan_status()), this fixes a harmless buglet where at driver init with the quirk active, the user could set the hwmon pwm1 attribute and switch out of pwm1_mode=2 to pwm1_mode=0 without changing pwm1_mode directly. Signed-off-by: Henrique de Moraes Holschuh Cc: Tino Keitel Signed-off-by: Len Brown --- drivers/platform/x86/thinkpad_acpi.c | 109 +++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 51 deletions(-) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index d7d41ae2f29..213219d938e 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -5896,6 +5896,60 @@ TPACPI_HANDLE(sfan, ec, "SFAN", /* 570 */ "JFNS", /* 770x-JL */ ); /* all others */ +/* + * Unitialized HFSP quirk: ACPI DSDT and EC fail to initialize the + * HFSP register at boot, so it contains 0x07 but the Thinkpad could + * be in auto mode (0x80). + * + * This is corrected by any write to HFSP either by the driver, or + * by the firmware. + * + * We assume 0x07 really means auto mode while this quirk is active, + * as this is far more likely than the ThinkPad being in level 7, + * which is only used by the firmware during thermal emergencies. + */ + +static void fan_quirk1_detect(void) +{ + /* In some ThinkPads, neither the EC nor the ACPI + * DSDT initialize the HFSP register, and it ends up + * being initially set to 0x07 when it *could* be + * either 0x07 or 0x80. + * + * Enable for TP-1Y (T43), TP-78 (R51e), + * TP-76 (R52), TP-70 (T43, R52), which are known + * to be buggy. */ + if (fan_control_initial_status == 0x07) { + switch (thinkpad_id.ec_model) { + case 0x5931: /* TP-1Y */ + case 0x3837: /* TP-78 */ + case 0x3637: /* TP-76 */ + case 0x3037: /* TP-70 */ + printk(TPACPI_NOTICE + "fan_init: initial fan status is unknown, " + "assuming it is in auto mode\n"); + tp_features.fan_ctrl_status_undef = 1; + ;; + } + } +} + +static void fan_quirk1_handle(u8 *fan_status) +{ + if (unlikely(tp_features.fan_ctrl_status_undef)) { + if (*fan_status != fan_control_initial_status) { + /* something changed the HFSP regisnter since + * driver init time, so it is not undefined + * anymore */ + tp_features.fan_ctrl_status_undef = 0; + } else { + /* Return most likely status. In fact, it + * might be the only possible status */ + *fan_status = TP_EC_FAN_AUTO; + } + } +} + /* * Call with fan_mutex held */ @@ -5934,8 +5988,10 @@ static int fan_get_status(u8 *status) if (unlikely(!acpi_ec_read(fan_status_offset, &s))) return -EIO; - if (likely(status)) + if (likely(status)) { *status = s; + fan_quirk1_handle(status); + } break; @@ -6245,16 +6301,6 @@ static ssize_t fan_pwm1_enable_show(struct device *dev, if (res) return res; - if (unlikely(tp_features.fan_ctrl_status_undef)) { - if (status != fan_control_initial_status) { - tp_features.fan_ctrl_status_undef = 0; - } else { - /* Return most likely status. In fact, it - * might be the only possible status */ - status = TP_EC_FAN_AUTO; - } - } - if (status & TP_EC_FAN_FULLSPEED) { mode = 0; } else if (status & TP_EC_FAN_AUTO) { @@ -6319,14 +6365,6 @@ static ssize_t fan_pwm1_show(struct device *dev, if (res) return res; - if (unlikely(tp_features.fan_ctrl_status_undef)) { - if (status != fan_control_initial_status) { - tp_features.fan_ctrl_status_undef = 0; - } else { - status = TP_EC_FAN_AUTO; - } - } - if ((status & (TP_EC_FAN_AUTO | TP_EC_FAN_FULLSPEED)) != 0) status = fan_control_desired_level; @@ -6458,29 +6496,7 @@ static int __init fan_init(struct ibm_init_struct *iibm) if (likely(acpi_ec_read(fan_status_offset, &fan_control_initial_status))) { fan_status_access_mode = TPACPI_FAN_RD_TPEC; - - /* In some ThinkPads, neither the EC nor the ACPI - * DSDT initialize the fan status, and it ends up - * being set to 0x07 when it *could* be either - * 0x07 or 0x80. - * - * Enable for TP-1Y (T43), TP-78 (R51e), - * TP-76 (R52), TP-70 (T43, R52), which are known - * to be buggy. */ - if (fan_control_initial_status == 0x07) { - switch (thinkpad_id.ec_model) { - case 0x5931: /* TP-1Y */ - case 0x3837: /* TP-78 */ - case 0x3637: /* TP-76 */ - case 0x3037: /* TP-70 */ - printk(TPACPI_NOTICE - "fan_init: initial fan status " - "is unknown, assuming it is " - "in auto mode\n"); - tp_features.fan_ctrl_status_undef = 1; - ;; - } - } + fan_quirk1_detect(); } else { printk(TPACPI_ERR "ThinkPad ACPI EC access misbehaving, " @@ -6669,15 +6685,6 @@ static int fan_read(char *p) if (rc < 0) return rc; - if (unlikely(tp_features.fan_ctrl_status_undef)) { - if (status != fan_control_initial_status) - tp_features.fan_ctrl_status_undef = 0; - else - /* Return most likely status. In fact, it - * might be the only possible status */ - status = TP_EC_FAN_AUTO; - } - len += sprintf(p + len, "status:\t\t%s\n", (status != 0) ? "enabled" : "disabled"); -- cgit From 54926ce8d2db7ebcbc4b80aae2cec571cd793e46 Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sun, 11 Jan 2009 03:01:09 -0200 Subject: ACPI: thinkpad-acpi: handle HKEY event 6030 HKEY event 0x6030 is a helper for Lenovo's Advanced Thermal Management Windows driver, which is, of course, completely undocumented. Silence any warnings about it being an unknown alarm, and report it unmodified for userspace. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Len Brown --- drivers/platform/x86/thinkpad_acpi.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 213219d938e..886a4306e78 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -2702,6 +2702,12 @@ static bool hotkey_notify_thermal(const u32 hkey, "a sensor reports something is extremely hot!\n"); /* recommended action: immediate sleep/hibernate */ return true; + case 0x6030: + printk(TPACPI_INFO + "EC reports that Thermal Table has changed\n"); + /* recommended action: do nothing, we don't have + * Lenovo ATM information */ + return true; default: printk(TPACPI_ALERT "THERMAL ALERT: unknown thermal alarm received\n"); -- cgit From aa2fbcec07b0d594808bc3058692395d24eba66e Mon Sep 17 00:00:00 2001 From: Henrique de Moraes Holschuh Date: Sun, 11 Jan 2009 03:01:10 -0200 Subject: ACPI: thinkpad-acpi: bump up version to 0.22 It is about time to bump up the version. Features added since 0.21: fan suspend/resume support, preserve radio state across power off (for some radio types), built-in UWB radio rfkill support and thermal alarm events support. Signed-off-by: Henrique de Moraes Holschuh Signed-off-by: Len Brown --- Documentation/laptops/thinkpad-acpi.txt | 6 +++--- drivers/platform/x86/thinkpad_acpi.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt index 91c00010b15..41bc99fa188 100644 --- a/Documentation/laptops/thinkpad-acpi.txt +++ b/Documentation/laptops/thinkpad-acpi.txt @@ -1,7 +1,7 @@ ThinkPad ACPI Extras Driver - Version 0.21 - May 29th, 2008 + Version 0.22 + November 23rd, 2008 Borislav Deianov Henrique de Moraes Holschuh @@ -17,7 +17,7 @@ This driver used to be named ibm-acpi until kernel 2.6.21 and release 0.13-20070314. It used to be in the drivers/acpi tree, but it was moved to the drivers/misc tree and renamed to thinkpad-acpi for kernel 2.6.22, and release 0.14. It was moved to drivers/platform/x86 for -kernel 2.6.29. +kernel 2.6.29 and release 0.22. The driver is named "thinkpad-acpi". In some places, like module names, "thinkpad_acpi" is used because of userspace issues. diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 886a4306e78..bcbc05107ba 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -21,7 +21,7 @@ * 02110-1301, USA. */ -#define TPACPI_VERSION "0.21" +#define TPACPI_VERSION "0.22" #define TPACPI_SYSFS_VERSION 0x020200 /* -- cgit From 6272d68cc6a5f90c6b1a2228cf0f67b895305d17 Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Thu, 15 Jan 2009 17:17:15 +0100 Subject: sched: sched_slice() fixlet Mike's change: 0a582440f "sched: fix sched_slice())" broke group scheduling by forgetting to reload cfs_rq on each loop. This patch fixes aim7 regression and specjbb2005 regression becomes less than 1.5% on 8-core stokley. Signed-off-by: Lin Ming Signed-off-by: Peter Zijlstra Tested-by: Jayson King Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 16b419bb8b0..5cc1c162044 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -429,7 +429,10 @@ static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) u64 slice = __sched_period(cfs_rq->nr_running + !se->on_rq); for_each_sched_entity(se) { - struct load_weight *load = &cfs_rq->load; + struct load_weight *load; + + cfs_rq = cfs_rq_of(se); + load = &cfs_rq->load; if (unlikely(!se->on_rq)) { struct load_weight lw = cfs_rq->load; -- cgit From c0253eec2a8e1140dd9672a1efe095cdcf5811d0 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 15 Jan 2009 13:30:25 +0000 Subject: tty: Fix race in the flush for some ldiscs If you issue an ioctl to flush a tty as the line discipline is changing or otherwise unplugged you can get a crash. The bug is very old but the rest of the BKL lock dropping and some very "good" luck on Ingo's part caught an example. Use the correct ldisc_ref form so that we wait for the ldisc change to complete and then flush Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/char/tty_ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c index a408c8e487e..6f4c7d0a53b 100644 --- a/drivers/char/tty_ioctl.c +++ b/drivers/char/tty_ioctl.c @@ -1057,7 +1057,7 @@ int tty_perform_flush(struct tty_struct *tty, unsigned long arg) if (retval) return retval; - ld = tty_ldisc_ref(tty); + ld = tty_ldisc_ref_wait(tty); switch (arg) { case TCIFLUSH: if (ld && ld->ops->flush_buffer) -- cgit From 25cf9bc1fcb085daaeb82b09bab0fb3c40570887 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 15 Jan 2009 13:30:34 +0000 Subject: 8250_pci: add support for netmos 9835 IBM devices Most of netmos 9835 hardware is handled by parport-serial. IBM introduces a device which doesn't have any parallel ports and have screwed subdevice PCI id (not corresponding to port numbers). Handle this device (9710:9835 1014:0299) properly. Signed-off-by: Jiri Slaby Signed-off-by: Andrew Morton Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/serial/8250_pci.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index c088146b751..2a3671233b1 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -602,6 +602,10 @@ static int pci_netmos_init(struct pci_dev *dev) /* subdevice 0x00PS means

parallel, serial */ unsigned int num_serial = dev->subsystem_device & 0xf; + if (dev->subsystem_vendor == PCI_VENDOR_ID_IBM && + dev->subsystem_device == 0x0299) + return 0; + if (num_serial == 0) return -ENODEV; return num_serial; @@ -3096,6 +3100,10 @@ static struct pci_device_id serial_pci_tbl[] = { 0, pbn_b0_8_115200 }, + { PCI_VENDOR_ID_NETMOS, PCI_DEVICE_ID_NETMOS_9835, + PCI_VENDOR_ID_IBM, 0x0299, + 0, 0, pbn_b0_bt_2_115200 }, + /* * These entries match devices with class COMMUNICATION_SERIAL, * COMMUNICATION_MODEM or COMMUNICATION_MULTISERIAL -- cgit From 7fdd4f76e9a289592d020538f1837a7541ea89ff Mon Sep 17 00:00:00 2001 From: Daniel Gagnon Date: Thu, 15 Jan 2009 13:30:45 +0000 Subject: serial: Add SupraExpress 336i PnP Voice Modem Add SupraExpress 336i PnP Voice Modem Tested and working with the following device: (output from lspnp -v) 01:01.00 SUP1381 (unknown) state = active io 0x2f8-0x2ff irq 3 Signed-off-by: Daniel Gagnon Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/serial/8250_pnp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/serial/8250_pnp.c b/drivers/serial/8250_pnp.c index fde7f9ccf57..bbcfc26a3b6 100644 --- a/drivers/serial/8250_pnp.c +++ b/drivers/serial/8250_pnp.c @@ -270,6 +270,8 @@ static const struct pnp_device_id pnp_dev_table[] = { { "RSS0250", 0 }, /* SupraExpress 28.8 Data/Fax PnP modem */ { "SUP1310", 0 }, + /* SupraExpress 336i PnP Voice Modem */ + { "SUP1381", 0 }, /* SupraExpress 33.6 Data/Fax PnP modem */ { "SUP1421", 0 }, /* SupraExpress 33.6 Data/Fax PnP modem */ -- cgit From cd1e40f0981e22018373307cd4087dc876b08fb0 Mon Sep 17 00:00:00 2001 From: Mischa Jonker Date: Thu, 15 Jan 2009 13:30:56 +0000 Subject: When a break signal is detected, the next character should be ignored. This was not implemented correctly for the pnx8xxx_uart driver. [From further discussion: Correct, you can look to it as two separate bugs: a) the next character is not ignored while it should; b) the status bits 31-8 are copied to the 'ch' variable while they shouldn't. Both bugs prevent correct break signal handling (and therefore correct behaviour of the magic SysRq key). Bug b didn't cause too much trouble earlier because in most situations the status bits are all zero; for this case they unfortunately aren't. ] Signed-off-by: Mischa Jonker Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/serial/pnx8xxx_uart.c | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/drivers/serial/pnx8xxx_uart.c b/drivers/serial/pnx8xxx_uart.c index 22e30d21225..1bb8f1b4576 100644 --- a/drivers/serial/pnx8xxx_uart.c +++ b/drivers/serial/pnx8xxx_uart.c @@ -187,7 +187,7 @@ static void pnx8xxx_rx_chars(struct pnx8xxx_port *sport) status = FIFO_TO_SM(serial_in(sport, PNX8XXX_FIFO)) | ISTAT_TO_SM(serial_in(sport, PNX8XXX_ISTAT)); while (status & FIFO_TO_SM(PNX8XXX_UART_FIFO_RXFIFO)) { - ch = serial_in(sport, PNX8XXX_FIFO); + ch = serial_in(sport, PNX8XXX_FIFO) & 0xff; sport->port.icount.rx++; @@ -198,9 +198,16 @@ static void pnx8xxx_rx_chars(struct pnx8xxx_port *sport) * out of the main execution path */ if (status & (FIFO_TO_SM(PNX8XXX_UART_FIFO_RXFE | - PNX8XXX_UART_FIFO_RXPAR) | + PNX8XXX_UART_FIFO_RXPAR | + PNX8XXX_UART_FIFO_RXBRK) | ISTAT_TO_SM(PNX8XXX_UART_INT_RXOVRN))) { - if (status & FIFO_TO_SM(PNX8XXX_UART_FIFO_RXPAR)) + if (status & FIFO_TO_SM(PNX8XXX_UART_FIFO_RXBRK)) { + status &= ~(FIFO_TO_SM(PNX8XXX_UART_FIFO_RXFE) | + FIFO_TO_SM(PNX8XXX_UART_FIFO_RXPAR)); + sport->port.icount.brk++; + if (uart_handle_break(&sport->port)) + goto ignore_char; + } else if (status & FIFO_TO_SM(PNX8XXX_UART_FIFO_RXPAR)) sport->port.icount.parity++; else if (status & FIFO_TO_SM(PNX8XXX_UART_FIFO_RXFE)) sport->port.icount.frame++; @@ -284,14 +291,8 @@ static irqreturn_t pnx8xxx_int(int irq, void *dev_id) /* Get the interrupts */ status = serial_in(sport, PNX8XXX_ISTAT) & serial_in(sport, PNX8XXX_IEN); - /* Break signal received */ - if (status & PNX8XXX_UART_INT_BREAK) { - sport->port.icount.brk++; - uart_handle_break(&sport->port); - } - - /* Byte received */ - if (status & PNX8XXX_UART_INT_RX) + /* Byte or break signal received */ + if (status & (PNX8XXX_UART_INT_RX | PNX8XXX_UART_INT_BREAK)) pnx8xxx_rx_chars(sport); /* TX holding register empty - transmit a byte */ -- cgit From a9fec7102f01f6464b19e13ffd9d8c48663379ad Mon Sep 17 00:00:00 2001 From: Jim Paris Date: Thu, 15 Jan 2009 13:31:07 +0000 Subject: ftdi_sio: fix kref leak Commit 4a90f09b20f4622dcbff1f0e1e6bae1704f8ad8c added kref stuff to ftdi_sio, but missed tty_kref_put at one exit point in ftdi_process_read. Signed-off-by: Jim Paris Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/usb/serial/ftdi_sio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index ef6cfa5a447..c70a8f667d8 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -2030,7 +2030,7 @@ static void ftdi_process_read(struct work_struct *work) spin_unlock_irqrestore(&priv->rx_lock, flags); dbg("%s - deferring remainder until unthrottled", __func__); - return; + goto out; } spin_unlock_irqrestore(&priv->rx_lock, flags); /* if the port is closed stop trying to read */ -- cgit From fe41cbb164a0dc55f3914a0e4cabe8240410157c Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Thu, 15 Jan 2009 13:31:15 +0000 Subject: tty: Fix a kref leak in the HSO driver on re-open Signed-off-by: Linus Torvalds --- drivers/net/usb/hso.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index c4918b86ed1..9df04dd1332 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -1297,6 +1297,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) /* setup */ spin_lock_irq(&serial->serial_lock); tty->driver_data = serial; + tty_kref_put(serial->tty); serial->tty = tty_kref_get(tty); spin_unlock_irq(&serial->serial_lock); -- cgit From d45eb81c3e345fabaf27ef3ab437b85c0bf9fafa Mon Sep 17 00:00:00 2001 From: Denis Joseph Barrow Date: Thu, 15 Jan 2009 13:31:24 +0000 Subject: tty: Fix double grabbing of a spinlock The HSO changes for kref introduced a recursive spinlock take. All functions which call put_rxbuf_data already have serial->serial_lock grabbed. [Comment to code added-AC] Signed-off-by: Denis Joseph Barrow Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/net/usb/hso.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 9df04dd1332..e25a58f6ff6 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2044,9 +2044,8 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial) return -2; } - spin_lock(&serial->serial_lock); + /* All callers to put_rxbuf_data hold serial_lock */ tty = tty_kref_get(serial->tty); - spin_unlock(&serial->serial_lock); /* Push data to tty */ if (tty) { -- cgit From 5839b414f9f9d2d6a471988763b61dbf85eb2dba Mon Sep 17 00:00:00 2001 From: Denis Joseph Barrow Date: Thu, 15 Jan 2009 13:31:34 +0000 Subject: hso serial throttled tty kref fix. This patch is for Alan Cox as it related to the tty layer. Hopefully the hso driver is again relatively stable with this fix. Signed-off-by: Denis Joseph Barrow Signed-off-by: Alan Cox Signed-off-by: Linus Torvalds --- drivers/net/usb/hso.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index e25a58f6ff6..6478bf63f28 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -2053,8 +2053,10 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial) serial->curr_rx_urb_offset; D1("data to push to tty"); while (write_length_remaining) { - if (test_bit(TTY_THROTTLED, &tty->flags)) + if (test_bit(TTY_THROTTLED, &tty->flags)) { + tty_kref_put(tty); return -1; + } curr_write_len = tty_insert_flip_string (tty, urb->transfer_buffer + serial->curr_rx_urb_offset, -- cgit From cbbc49877d44408c4d0decf77c3c141732bbc679 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 15 Jan 2009 19:12:51 +0000 Subject: CREDITS address update for dwmw2. Update employer's care-of address in CREDITS file, and remove references to some _very_ old stuff I'd forgotten I'd ever done. Signed-off-by: David Woodhouse Signed-off-by: Linus Torvalds --- CREDITS | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/CREDITS b/CREDITS index 939da46a87f..2b39168c06a 100644 --- a/CREDITS +++ b/CREDITS @@ -3786,14 +3786,11 @@ S: The Netherlands N: David Woodhouse E: dwmw2@infradead.org -D: ARCnet stuff, Applicom board driver, SO_BINDTODEVICE, -D: some Alpha platform porting from 2.0, Memory Technology Devices, -D: Acquire watchdog timer, PC speaker driver maintenance, +D: JFFS2 file system, Memory Technology Device subsystem, D: various other stuff that annoyed me by not working. -S: c/o Red Hat Engineering -S: Rustat House -S: 60 Clifton Road -S: Cambridge. CB1 7EG +S: c/o Intel Corporation +S: Pipers Way +S: Swindon. SN3 1RJ S: England N: Chris Wright -- cgit From bb9a35f293a3c8b5d57253cdfe2f29fa2627e1b9 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 15 Jan 2009 22:27:46 +0100 Subject: hwmon: (k8temp) Warn about fam F rev F errata Add warning about wrong CPU temperature readouts on all fam F rev F. The allowed combinations of processors ensure that all processors in a multisocket system have similar characteristics, e.g. (1) provide temperature sensor interface (>=RevC && =RevF) Thus it is sufficient to check the revision of the boot CPU. For "mixed silicon support" refer to "Revision Guide for AMD Athlon 64 and AMD Opteron Processors" (RevA-E) and "Revision Guide for AMD NPT Family 0Fh Processors" (RefF-G). Cc: Rudolf Marek Signed-off-by: Andreas Herrmann Signed-off-by: Jean Delvare --- drivers/hwmon/k8temp.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c index bd2bde0ef95..ca56f2e26fd 100644 --- a/drivers/hwmon/k8temp.c +++ b/drivers/hwmon/k8temp.c @@ -31,6 +31,7 @@ #include #include #include +#include #define TEMP_FROM_REG(val) (((((val) >> 16) & 0xff) - 49) * 1000) #define REG_TEMP 0xe4 @@ -141,20 +142,34 @@ static int __devinit k8temp_probe(struct pci_dev *pdev, int err; u8 scfg; u32 temp; + u8 model, stepping; struct k8temp_data *data; - u32 cpuid = cpuid_eax(1); - - /* this feature should be available since SH-C0 core */ - if ((cpuid == 0xf40) || (cpuid == 0xf50) || (cpuid == 0xf51)) { - err = -ENODEV; - goto exit; - } if (!(data = kzalloc(sizeof(struct k8temp_data), GFP_KERNEL))) { err = -ENOMEM; goto exit; } + model = boot_cpu_data.x86_model; + stepping = boot_cpu_data.x86_mask; + + switch (boot_cpu_data.x86) { + case 0xf: + /* feature available since SH-C0, exclude older revisions */ + if (((model == 4) && (stepping == 0)) || + ((model == 5) && (stepping <= 1))) { + err = -ENODEV; + goto exit_free; + } + + if (model >= 0x40) { + dev_warn(&pdev->dev, "Temperature readouts might be " + "wrong - check erratum #141\n"); + } + + break; + } + pci_read_config_byte(pdev, REG_TEMP, &scfg); scfg &= ~(SEL_PLACE | SEL_CORE); /* Select sensor 0, core0 */ pci_write_config_byte(pdev, REG_TEMP, scfg); -- cgit From a2e066bba2aad6583e3ff648bf28339d6c9f0898 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 15 Jan 2009 22:27:47 +0100 Subject: hwmon: (k8temp) Fix wrong sensor selection for AMD K8 RevF/RevG CPUs Meaning of ThermSenseCoreSel bit was inverted beginning with K8 RevF. That means with current driver temp1/temp2 belong to core 1 and temp3/temp4 belong to core 0 on a K8 RevF/RevG CPU. This patch ensures that temp1/temp2 always belong to core 0 and temp3/temp4 to core 1 for all K8 revisions. Cc: Rudolf Marek Signed-off-by: Andreas Herrmann Signed-off-by: Jean Delvare --- drivers/hwmon/k8temp.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c index ca56f2e26fd..a6381bc9189 100644 --- a/drivers/hwmon/k8temp.c +++ b/drivers/hwmon/k8temp.c @@ -48,6 +48,7 @@ struct k8temp_data { /* registers values */ u8 sensorsp; /* sensor presence bits - SEL_CORE & SEL_PLACE */ u32 temp[2][2]; /* core, place */ + u8 swap_core_select; /* meaning of SEL_CORE is inverted */ }; static struct k8temp_data *k8temp_update_device(struct device *dev) @@ -117,6 +118,9 @@ static ssize_t show_temp(struct device *dev, int place = attr->index; struct k8temp_data *data = k8temp_update_device(dev); + if (data->swap_core_select) + core = core ? 0 : 1; + return sprintf(buf, "%d\n", TEMP_FROM_REG(data->temp[core][place])); } @@ -162,7 +166,12 @@ static int __devinit k8temp_probe(struct pci_dev *pdev, goto exit_free; } + /* + * AMD NPT family 0fh, i.e. RevF and RevG: + * meaning of SEL_CORE bit is inverted + */ if (model >= 0x40) { + data->swap_core_select = 1; dev_warn(&pdev->dev, "Temperature readouts might be " "wrong - check erratum #141\n"); } -- cgit From 76ff08da34196cfa308fcd3552bb9ea20888e745 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 15 Jan 2009 22:27:47 +0100 Subject: hwmon: (k8temp) Fix temperature reporting for (most) K8 RevG CPUs Current Temperature for K8 RevG desktop CPUs is a "normalized value" which can be below ambient temperature. As a consequence lots of RevG systems report temperatures like: $ sensors k8temp-pci-00c3 Adapter: PCI adapter Core0 Temp: +17 C Core0 Temp: +3 C Core1 Temp: +21 C Core1 Temp: +5 C being quite below ambient temperature. There are even reports of negative temperature values. This patch corrects the temperature reporting of k8temp for RevG desktop CPUs. Cc: Rudolf Marek Signed-off-by: Andreas Herrmann Signed-off-by: Jean Delvare --- drivers/hwmon/k8temp.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c index a6381bc9189..1fe99511184 100644 --- a/drivers/hwmon/k8temp.c +++ b/drivers/hwmon/k8temp.c @@ -49,6 +49,7 @@ struct k8temp_data { u8 sensorsp; /* sensor presence bits - SEL_CORE & SEL_PLACE */ u32 temp[2][2]; /* core, place */ u8 swap_core_select; /* meaning of SEL_CORE is inverted */ + u32 temp_offset; }; static struct k8temp_data *k8temp_update_device(struct device *dev) @@ -116,13 +117,15 @@ static ssize_t show_temp(struct device *dev, to_sensor_dev_attr_2(devattr); int core = attr->nr; int place = attr->index; + int temp; struct k8temp_data *data = k8temp_update_device(dev); if (data->swap_core_select) core = core ? 0 : 1; - return sprintf(buf, "%d\n", - TEMP_FROM_REG(data->temp[core][place])); + temp = TEMP_FROM_REG(data->temp[core][place]) + data->temp_offset; + + return sprintf(buf, "%d\n", temp); } /* core, place */ @@ -176,6 +179,16 @@ static int __devinit k8temp_probe(struct pci_dev *pdev, "wrong - check erratum #141\n"); } + if ((model >= 0x69) && + !(model == 0xc1 || model == 0x6c || model == 0x7c)) { + /* + * RevG desktop CPUs (i.e. no socket S1G1 parts) + * need additional offset, otherwise reported + * temperature is below ambient temperature + */ + data->temp_offset = 21000; + } + break; } -- cgit From 1c301fc5394f7e1aa4c201e6e03d55d9c08b3bdf Mon Sep 17 00:00:00 2001 From: Jordan Crouse Date: Thu, 15 Jan 2009 22:27:47 +0100 Subject: hwmon: Add a driver for the ADT7475 hardware monitoring chip Hwmon driver for the ADT7475 chip. Signed-off-by: Jordan Crouse Signed-off-by: Hans de Goede Signed-off-by: Jean Delvare --- Documentation/hwmon/adt7475 | 87 +++ drivers/hwmon/Kconfig | 10 + drivers/hwmon/Makefile | 2 + drivers/hwmon/adt7475.c | 1221 +++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 1320 insertions(+) create mode 100644 Documentation/hwmon/adt7475 create mode 100644 drivers/hwmon/adt7475.c diff --git a/Documentation/hwmon/adt7475 b/Documentation/hwmon/adt7475 new file mode 100644 index 00000000000..a2b1abec850 --- /dev/null +++ b/Documentation/hwmon/adt7475 @@ -0,0 +1,87 @@ +This describes the interface for the ADT7475 driver: + +(there are 4 fans, numbered fan1 to fan4): + +fanX_input Read the current speed of the fan (in RPMs) +fanX_min Read/write the minimum speed of the fan. Dropping + below this sets an alarm. + +(there are three PWMs, numbered pwm1 to pwm3): + +pwmX Read/write the current duty cycle of the PWM. Writes + only have effect when auto mode is turned off (see + below). Range is 0 - 255. + +pwmX_enable Fan speed control method: + + 0 - No control (fan at full speed) + 1 - Manual fan speed control (using pwm[1-*]) + 2 - Automatic fan speed control + +pwmX_auto_channels_temp Select which channels affect this PWM + + 1 - TEMP1 controls PWM + 2 - TEMP2 controls PWM + 4 - TEMP3 controls PWM + 6 - TEMP2 and TEMP3 control PWM + 7 - All three inputs control PWM + +pwmX_freq Read/write the PWM frequency in Hz. The number + should be one of the following: + + 11 Hz + 14 Hz + 22 Hz + 29 Hz + 35 Hz + 44 Hz + 58 Hz + 88 Hz + +pwmX_auto_point1_pwm Read/write the minimum PWM duty cycle in automatic mode + +pwmX_auto_point2_pwm Read/write the maximum PWM duty cycle in automatic mode + +(there are three temperature settings numbered temp1 to temp3): + +tempX_input Read the current temperature. The value is in milli + degrees of Celsius. + +tempX_max Read/write the upper temperature limit - exceeding this + will cause an alarm. + +tempX_min Read/write the lower temperature limit - exceeding this + will cause an alarm. + +tempX_offset Read/write the temperature adjustment offset + +tempX_crit Read/write the THERM limit for remote1. + +tempX_crit_hyst Set the temperature value below crit where the + fans will stay on - this helps drive the temperature + low enough so it doesn't stay near the edge and + cause THERM to keep tripping. + +tempX_auto_point1_temp Read/write the minimum temperature where the fans will + turn on in automatic mode. + +tempX_auto_point2_temp Read/write the maximum temperature over which the fans + will run in automatic mode. tempX_auto_point1_temp + and tempX_auto_point2_temp together define the + range of automatic control. + +tempX_alarm Read a 1 if the max/min alarm is set +tempX_fault Read a 1 if either temp1 or temp3 diode has a fault + +(There are two voltage settings, in1 and in2): + +inX_input Read the current voltage on VCC. Value is in + millivolts. + +inX_min read/write the minimum voltage limit. + Dropping below this causes an alarm. + +inX_max read/write the maximum voltage limit. + Exceeding this causes an alarm. + +inX_alarm Read a 1 if the max/min alarm is set. diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 4b33bc82cc2..5c349a19a3a 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -189,6 +189,16 @@ config SENSORS_ADT7473 This driver can also be built as a module. If so, the module will be called adt7473. +config SENSORS_ADT7475 + tristate "Analog Devices ADT7475" + depends on I2C && EXPERIMENTAL + help + If you say yes here you get support for the Analog Devices + ADT7475 hardware monitoring chips. + + This driver can also be build as a module. If so, the module + will be called adt7475. + config SENSORS_K8TEMP tristate "AMD Athlon64/FX or Opteron temperature sensor" depends on X86 && PCI && EXPERIMENTAL diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 19cb1ace3eb..2e80f37f39e 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -28,6 +28,8 @@ obj-$(CONFIG_SENSORS_ADS7828) += ads7828.o obj-$(CONFIG_SENSORS_ADT7462) += adt7462.o obj-$(CONFIG_SENSORS_ADT7470) += adt7470.o obj-$(CONFIG_SENSORS_ADT7473) += adt7473.o +obj-$(CONFIG_SENSORS_ADT7475) += adt7475.o + obj-$(CONFIG_SENSORS_APPLESMC) += applesmc.o obj-$(CONFIG_SENSORS_AMS) += ams/ obj-$(CONFIG_SENSORS_ATXP1) += atxp1.o diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c new file mode 100644 index 00000000000..d39877a7da6 --- /dev/null +++ b/drivers/hwmon/adt7475.c @@ -0,0 +1,1221 @@ +/* + * adt7475 - Thermal sensor driver for the ADT7475 chip and derivatives + * Copyright (C) 2007-2008, Advanced Micro Devices, Inc. + * Copyright (C) 2008 Jordan Crouse + * Copyright (C) 2008 Hans de Goede + + * Derived from the lm83 driver by Jean Delvare + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +/* Indexes for the sysfs hooks */ + +#define INPUT 0 +#define MIN 1 +#define MAX 2 +#define CONTROL 3 +#define OFFSET 3 +#define AUTOMIN 4 +#define THERM 5 +#define HYSTERSIS 6 + +/* These are unique identifiers for the sysfs functions - unlike the + numbers above, these are not also indexes into an array +*/ + +#define ALARM 9 +#define FAULT 10 + +/* 7475 Common Registers */ + +#define REG_VOLTAGE_BASE 0x21 +#define REG_TEMP_BASE 0x25 +#define REG_TACH_BASE 0x28 +#define REG_PWM_BASE 0x30 +#define REG_PWM_MAX_BASE 0x38 + +#define REG_DEVID 0x3D +#define REG_VENDID 0x3E + +#define REG_STATUS1 0x41 +#define REG_STATUS2 0x42 + +#define REG_VOLTAGE_MIN_BASE 0x46 +#define REG_VOLTAGE_MAX_BASE 0x47 + +#define REG_TEMP_MIN_BASE 0x4E +#define REG_TEMP_MAX_BASE 0x4F + +#define REG_TACH_MIN_BASE 0x54 + +#define REG_PWM_CONFIG_BASE 0x5C + +#define REG_TEMP_TRANGE_BASE 0x5F + +#define REG_PWM_MIN_BASE 0x64 + +#define REG_TEMP_TMIN_BASE 0x67 +#define REG_TEMP_THERM_BASE 0x6A + +#define REG_REMOTE1_HYSTERSIS 0x6D +#define REG_REMOTE2_HYSTERSIS 0x6E + +#define REG_TEMP_OFFSET_BASE 0x70 + +#define REG_EXTEND1 0x76 +#define REG_EXTEND2 0x77 +#define REG_CONFIG5 0x7C + +#define CONFIG5_TWOSCOMP 0x01 +#define CONFIG5_TEMPOFFSET 0x02 + +/* ADT7475 Settings */ + +#define ADT7475_VOLTAGE_COUNT 2 +#define ADT7475_TEMP_COUNT 3 +#define ADT7475_TACH_COUNT 4 +#define ADT7475_PWM_COUNT 3 + +/* Macro to read the registers */ + +#define adt7475_read(reg) i2c_smbus_read_byte_data(client, (reg)) + +/* Macros to easily index the registers */ + +#define TACH_REG(idx) (REG_TACH_BASE + ((idx) * 2)) +#define TACH_MIN_REG(idx) (REG_TACH_MIN_BASE + ((idx) * 2)) + +#define PWM_REG(idx) (REG_PWM_BASE + (idx)) +#define PWM_MAX_REG(idx) (REG_PWM_MAX_BASE + (idx)) +#define PWM_MIN_REG(idx) (REG_PWM_MIN_BASE + (idx)) +#define PWM_CONFIG_REG(idx) (REG_PWM_CONFIG_BASE + (idx)) + +#define VOLTAGE_REG(idx) (REG_VOLTAGE_BASE + (idx)) +#define VOLTAGE_MIN_REG(idx) (REG_VOLTAGE_MIN_BASE + ((idx) * 2)) +#define VOLTAGE_MAX_REG(idx) (REG_VOLTAGE_MAX_BASE + ((idx) * 2)) + +#define TEMP_REG(idx) (REG_TEMP_BASE + (idx)) +#define TEMP_MIN_REG(idx) (REG_TEMP_MIN_BASE + ((idx) * 2)) +#define TEMP_MAX_REG(idx) (REG_TEMP_MAX_BASE + ((idx) * 2)) +#define TEMP_TMIN_REG(idx) (REG_TEMP_TMIN_BASE + (idx)) +#define TEMP_THERM_REG(idx) (REG_TEMP_THERM_BASE + (idx)) +#define TEMP_OFFSET_REG(idx) (REG_TEMP_OFFSET_BASE + (idx)) +#define TEMP_TRANGE_REG(idx) (REG_TEMP_TRANGE_BASE + (idx)) + +static unsigned short normal_i2c[] = { 0x2e, I2C_CLIENT_END }; + +I2C_CLIENT_INSMOD_1(adt7475); + +static const struct i2c_device_id adt7475_id[] = { + { "adt7475", adt7475 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, adt7475_id); + +struct adt7475_data { + struct device *hwmon_dev; + struct mutex lock; + + unsigned long measure_updated; + unsigned long limits_updated; + char valid; + + u8 config5; + u16 alarms; + u16 voltage[3][3]; + u16 temp[7][3]; + u16 tach[2][4]; + u8 pwm[4][3]; + u8 range[3]; + u8 pwmctl[3]; + u8 pwmchan[3]; +}; + +static struct i2c_driver adt7475_driver; +static struct adt7475_data *adt7475_update_device(struct device *dev); +static void adt7475_read_hystersis(struct i2c_client *client); +static void adt7475_read_pwm(struct i2c_client *client, int index); + +/* Given a temp value, convert it to register value */ + +static inline u16 temp2reg(struct adt7475_data *data, long val) +{ + u16 ret; + + if (!(data->config5 & CONFIG5_TWOSCOMP)) { + val = SENSORS_LIMIT(val, -64000, 191000); + ret = (val + 64500) / 1000; + } else { + val = SENSORS_LIMIT(val, -128000, 127000); + if (val < -500) + ret = (256500 + val) / 1000; + else + ret = (val + 500) / 1000; + } + + return ret << 2; +} + +/* Given a register value, convert it to a real temp value */ + +static inline int reg2temp(struct adt7475_data *data, u16 reg) +{ + if (data->config5 & CONFIG5_TWOSCOMP) { + if (reg >= 512) + return (reg - 1024) * 250; + else + return reg * 250; + } else + return (reg - 256) * 250; +} + +static inline int tach2rpm(u16 tach) +{ + if (tach == 0 || tach == 0xFFFF) + return 0; + + return (90000 * 60) / tach; +} + +static inline u16 rpm2tach(unsigned long rpm) +{ + if (rpm == 0) + return 0; + + return SENSORS_LIMIT((90000 * 60) / rpm, 1, 0xFFFF); +} + +static inline int reg2vcc(u16 reg) +{ + return (4296 * reg) / 1000; +} + +static inline int reg2vccp(u16 reg) +{ + return (2929 * reg) / 1000; +} + +static inline u16 vcc2reg(long vcc) +{ + vcc = SENSORS_LIMIT(vcc, 0, 4396); + return (vcc * 1000) / 4296; +} + +static inline u16 vccp2reg(long vcc) +{ + vcc = SENSORS_LIMIT(vcc, 0, 2998); + return (vcc * 1000) / 2929; +} + +static u16 adt7475_read_word(struct i2c_client *client, int reg) +{ + u16 val; + + val = i2c_smbus_read_byte_data(client, reg); + val |= (i2c_smbus_read_byte_data(client, reg + 1) << 8); + + return val; +} + +static void adt7475_write_word(struct i2c_client *client, int reg, u16 val) +{ + i2c_smbus_write_byte_data(client, reg + 1, val >> 8); + i2c_smbus_write_byte_data(client, reg, val & 0xFF); +} + +/* Find the nearest value in a table - used for pwm frequency and + auto temp range */ +static int find_nearest(long val, const int *array, int size) +{ + int i; + + if (val < array[0]) + return 0; + + if (val > array[size - 1]) + return size - 1; + + for (i = 0; i < size - 1; i++) { + int a, b; + + if (val > array[i + 1]) + continue; + + a = val - array[i]; + b = array[i + 1] - val; + + return (a <= b) ? i : i + 1; + } + + return 0; +} + +static ssize_t show_voltage(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct adt7475_data *data = adt7475_update_device(dev); + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + unsigned short val; + + switch (sattr->nr) { + case ALARM: + return sprintf(buf, "%d\n", + (data->alarms >> (sattr->index + 1)) & 1); + default: + val = data->voltage[sattr->nr][sattr->index]; + return sprintf(buf, "%d\n", + sattr->index == + 0 ? reg2vccp(val) : reg2vcc(val)); + } +} + +static ssize_t set_voltage(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + struct i2c_client *client = to_i2c_client(dev); + struct adt7475_data *data = i2c_get_clientdata(client); + unsigned char reg; + long val; + + if (strict_strtol(buf, 10, &val)) + return -EINVAL; + + mutex_lock(&data->lock); + + data->voltage[sattr->nr][sattr->index] = + sattr->index ? vcc2reg(val) : vccp2reg(val); + + if (sattr->nr == MIN) + reg = VOLTAGE_MIN_REG(sattr->index); + else + reg = VOLTAGE_MAX_REG(sattr->index); + + i2c_smbus_write_byte_data(client, reg, + data->voltage[sattr->nr][sattr->index] >> 2); + mutex_unlock(&data->lock); + + return count; +} + +static ssize_t show_temp(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct adt7475_data *data = adt7475_update_device(dev); + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int out; + + switch (sattr->nr) { + case HYSTERSIS: + mutex_lock(&data->lock); + out = data->temp[sattr->nr][sattr->index]; + if (sattr->index != 1) + out = (out >> 4) & 0xF; + else + out = (out & 0xF); + /* Show the value as an absolute number tied to + * THERM */ + out = reg2temp(data, data->temp[THERM][sattr->index]) - + out * 1000; + mutex_unlock(&data->lock); + break; + + case OFFSET: + /* Offset is always 2's complement, regardless of the + * setting in CONFIG5 */ + mutex_lock(&data->lock); + out = (s8)data->temp[sattr->nr][sattr->index]; + if (data->config5 & CONFIG5_TEMPOFFSET) + out *= 1000; + else + out *= 500; + mutex_unlock(&data->lock); + break; + + case ALARM: + out = (data->alarms >> (sattr->index + 4)) & 1; + break; + + case FAULT: + /* Note - only for remote1 and remote2 */ + out = data->alarms & (sattr->index ? 0x8000 : 0x4000); + out = out ? 0 : 1; + break; + + default: + /* All other temp values are in the configured format */ + out = reg2temp(data, data->temp[sattr->nr][sattr->index]); + } + + return sprintf(buf, "%d\n", out); +} + +static ssize_t set_temp(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + struct i2c_client *client = to_i2c_client(dev); + struct adt7475_data *data = i2c_get_clientdata(client); + unsigned char reg = 0; + u8 out; + int temp; + long val; + + if (strict_strtol(buf, 10, &val)) + return -EINVAL; + + mutex_lock(&data->lock); + + /* We need the config register in all cases for temp <-> reg conv. */ + data->config5 = adt7475_read(REG_CONFIG5); + + switch (sattr->nr) { + case OFFSET: + if (data->config5 & CONFIG5_TEMPOFFSET) { + val = SENSORS_LIMIT(val, -63000, 127000); + out = data->temp[OFFSET][sattr->index] = val / 1000; + } else { + val = SENSORS_LIMIT(val, -63000, 64000); + out = data->temp[OFFSET][sattr->index] = val / 500; + } + break; + + case HYSTERSIS: + /* The value will be given as an absolute value, turn it + into an offset based on THERM */ + + /* Read fresh THERM and HYSTERSIS values from the chip */ + data->temp[THERM][sattr->index] = + adt7475_read(TEMP_THERM_REG(sattr->index)) << 2; + adt7475_read_hystersis(client); + + temp = reg2temp(data, data->temp[THERM][sattr->index]); + val = SENSORS_LIMIT(val, temp - 15000, temp); + val = (temp - val) / 1000; + + if (sattr->index != 1) { + data->temp[HYSTERSIS][sattr->index] &= 0xF0; + data->temp[HYSTERSIS][sattr->index] |= (val & 0xF) << 4; + } else { + data->temp[HYSTERSIS][sattr->index] &= 0x0F; + data->temp[HYSTERSIS][sattr->index] |= (val & 0xF); + } + + out = data->temp[HYSTERSIS][sattr->index]; + break; + + default: + data->temp[sattr->nr][sattr->index] = temp2reg(data, val); + + /* We maintain an extra 2 digits of precision for simplicity + * - shift those back off before writing the value */ + out = (u8) (data->temp[sattr->nr][sattr->index] >> 2); + } + + switch (sattr->nr) { + case MIN: + reg = TEMP_MIN_REG(sattr->index); + break; + case MAX: + reg = TEMP_MAX_REG(sattr->index); + break; + case OFFSET: + reg = TEMP_OFFSET_REG(sattr->index); + break; + case AUTOMIN: + reg = TEMP_TMIN_REG(sattr->index); + break; + case THERM: + reg = TEMP_THERM_REG(sattr->index); + break; + case HYSTERSIS: + if (sattr->index != 2) + reg = REG_REMOTE1_HYSTERSIS; + else + reg = REG_REMOTE2_HYSTERSIS; + + break; + } + + i2c_smbus_write_byte_data(client, reg, out); + + mutex_unlock(&data->lock); + return count; +} + +/* Table of autorange values - the user will write the value in millidegrees, + and we'll convert it */ +static const int autorange_table[] = { + 2000, 2500, 3330, 4000, 5000, 6670, 8000, + 10000, 13330, 16000, 20000, 26670, 32000, 40000, + 53330, 80000 +}; + +static ssize_t show_point2(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct adt7475_data *data = adt7475_update_device(dev); + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int out, val; + + mutex_lock(&data->lock); + out = (data->range[sattr->index] >> 4) & 0x0F; + val = reg2temp(data, data->temp[AUTOMIN][sattr->index]); + mutex_unlock(&data->lock); + + return sprintf(buf, "%d\n", val + autorange_table[out]); +} + +static ssize_t set_point2(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct adt7475_data *data = i2c_get_clientdata(client); + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int temp; + long val; + + if (strict_strtol(buf, 10, &val)) + return -EINVAL; + + mutex_lock(&data->lock); + + /* Get a fresh copy of the needed registers */ + data->config5 = adt7475_read(REG_CONFIG5); + data->temp[AUTOMIN][sattr->index] = + adt7475_read(TEMP_TMIN_REG(sattr->index)) << 2; + data->range[sattr->index] = + adt7475_read(TEMP_TRANGE_REG(sattr->index)); + + /* The user will write an absolute value, so subtract the start point + to figure the range */ + temp = reg2temp(data, data->temp[AUTOMIN][sattr->index]); + val = SENSORS_LIMIT(val, temp + autorange_table[0], + temp + autorange_table[ARRAY_SIZE(autorange_table) - 1]); + val -= temp; + + /* Find the nearest table entry to what the user wrote */ + val = find_nearest(val, autorange_table, ARRAY_SIZE(autorange_table)); + + data->range[sattr->index] &= ~0xF0; + data->range[sattr->index] |= val << 4; + + i2c_smbus_write_byte_data(client, TEMP_TRANGE_REG(sattr->index), + data->range[sattr->index]); + + mutex_unlock(&data->lock); + return count; +} + +static ssize_t show_tach(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct adt7475_data *data = adt7475_update_device(dev); + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + int out; + + if (sattr->nr == ALARM) + out = (data->alarms >> (sattr->index + 10)) & 1; + else + out = tach2rpm(data->tach[sattr->nr][sattr->index]); + + return sprintf(buf, "%d\n", out); +} + +static ssize_t set_tach(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + struct i2c_client *client = to_i2c_client(dev); + struct adt7475_data *data = i2c_get_clientdata(client); + unsigned long val; + + if (strict_strtoul(buf, 10, &val)) + return -EINVAL; + + mutex_lock(&data->lock); + + data->tach[MIN][sattr->index] = rpm2tach(val); + + adt7475_write_word(client, TACH_MIN_REG(sattr->index), + data->tach[MIN][sattr->index]); + + mutex_unlock(&data->lock); + return count; +} + +static ssize_t show_pwm(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct adt7475_data *data = adt7475_update_device(dev); + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + + return sprintf(buf, "%d\n", data->pwm[sattr->nr][sattr->index]); +} + +static ssize_t show_pwmchan(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct adt7475_data *data = adt7475_update_device(dev); + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + + return sprintf(buf, "%d\n", data->pwmchan[sattr->index]); +} + +static ssize_t show_pwmctrl(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct adt7475_data *data = adt7475_update_device(dev); + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + + return sprintf(buf, "%d\n", data->pwmctl[sattr->index]); +} + +static ssize_t set_pwm(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + struct i2c_client *client = to_i2c_client(dev); + struct adt7475_data *data = i2c_get_clientdata(client); + unsigned char reg = 0; + long val; + + if (strict_strtol(buf, 10, &val)) + return -EINVAL; + + mutex_lock(&data->lock); + + switch (sattr->nr) { + case INPUT: + /* Get a fresh value for CONTROL */ + data->pwm[CONTROL][sattr->index] = + adt7475_read(PWM_CONFIG_REG(sattr->index)); + + /* If we are not in manual mode, then we shouldn't allow + * the user to set the pwm speed */ + if (((data->pwm[CONTROL][sattr->index] >> 5) & 7) != 7) { + mutex_unlock(&data->lock); + return count; + } + + reg = PWM_REG(sattr->index); + break; + + case MIN: + reg = PWM_MIN_REG(sattr->index); + break; + + case MAX: + reg = PWM_MAX_REG(sattr->index); + break; + } + + data->pwm[sattr->nr][sattr->index] = SENSORS_LIMIT(val, 0, 0xFF); + i2c_smbus_write_byte_data(client, reg, + data->pwm[sattr->nr][sattr->index]); + + mutex_unlock(&data->lock); + + return count; +} + +/* Called by set_pwmctrl and set_pwmchan */ + +static int hw_set_pwm(struct i2c_client *client, int index, + unsigned int pwmctl, unsigned int pwmchan) +{ + struct adt7475_data *data = i2c_get_clientdata(client); + long val = 0; + + switch (pwmctl) { + case 0: + val = 0x03; /* Run at full speed */ + break; + case 1: + val = 0x07; /* Manual mode */ + break; + case 2: + switch (pwmchan) { + case 1: + /* Remote1 controls PWM */ + val = 0x00; + break; + case 2: + /* local controls PWM */ + val = 0x01; + break; + case 4: + /* remote2 controls PWM */ + val = 0x02; + break; + case 6: + /* local/remote2 control PWM */ + val = 0x05; + break; + case 7: + /* All three control PWM */ + val = 0x06; + break; + default: + return -EINVAL; + } + break; + default: + return -EINVAL; + } + + data->pwmctl[index] = pwmctl; + data->pwmchan[index] = pwmchan; + + data->pwm[CONTROL][index] &= ~0xE0; + data->pwm[CONTROL][index] |= (val & 7) << 5; + + i2c_smbus_write_byte_data(client, PWM_CONFIG_REG(index), + data->pwm[CONTROL][index]); + + return 0; +} + +static ssize_t set_pwmchan(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + struct i2c_client *client = to_i2c_client(dev); + struct adt7475_data *data = i2c_get_clientdata(client); + int r; + long val; + + if (strict_strtol(buf, 10, &val)) + return -EINVAL; + + mutex_lock(&data->lock); + /* Read Modify Write PWM values */ + adt7475_read_pwm(client, sattr->index); + r = hw_set_pwm(client, sattr->index, data->pwmctl[sattr->index], val); + if (r) + count = r; + mutex_unlock(&data->lock); + + return count; +} + +static ssize_t set_pwmctrl(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + struct i2c_client *client = to_i2c_client(dev); + struct adt7475_data *data = i2c_get_clientdata(client); + int r; + long val; + + if (strict_strtol(buf, 10, &val)) + return -EINVAL; + + mutex_lock(&data->lock); + /* Read Modify Write PWM values */ + adt7475_read_pwm(client, sattr->index); + r = hw_set_pwm(client, sattr->index, val, data->pwmchan[sattr->index]); + if (r) + count = r; + mutex_unlock(&data->lock); + + return count; +} + +/* List of frequencies for the PWM */ +static const int pwmfreq_table[] = { + 11, 14, 22, 29, 35, 44, 58, 88 +}; + +static ssize_t show_pwmfreq(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct adt7475_data *data = adt7475_update_device(dev); + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + + return sprintf(buf, "%d\n", + pwmfreq_table[data->range[sattr->index] & 7]); +} + +static ssize_t set_pwmfreq(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute_2 *sattr = to_sensor_dev_attr_2(attr); + struct i2c_client *client = to_i2c_client(dev); + struct adt7475_data *data = i2c_get_clientdata(client); + int out; + long val; + + if (strict_strtol(buf, 10, &val)) + return -EINVAL; + + out = find_nearest(val, pwmfreq_table, ARRAY_SIZE(pwmfreq_table)); + + mutex_lock(&data->lock); + + data->range[sattr->index] = + adt7475_read(TEMP_TRANGE_REG(sattr->index)); + data->range[sattr->index] &= ~7; + data->range[sattr->index] |= out; + + i2c_smbus_write_byte_data(client, TEMP_TRANGE_REG(sattr->index), + data->range[sattr->index]); + + mutex_unlock(&data->lock); + return count; +} + +static SENSOR_DEVICE_ATTR_2(in1_input, S_IRUGO, show_voltage, NULL, INPUT, 0); +static SENSOR_DEVICE_ATTR_2(in1_max, S_IRUGO | S_IWUSR, show_voltage, + set_voltage, MAX, 0); +static SENSOR_DEVICE_ATTR_2(in1_min, S_IRUGO | S_IWUSR, show_voltage, + set_voltage, MIN, 0); +static SENSOR_DEVICE_ATTR_2(in1_alarm, S_IRUGO, show_voltage, NULL, ALARM, 0); +static SENSOR_DEVICE_ATTR_2(in2_input, S_IRUGO, show_voltage, NULL, INPUT, 1); +static SENSOR_DEVICE_ATTR_2(in2_max, S_IRUGO | S_IWUSR, show_voltage, + set_voltage, MAX, 1); +static SENSOR_DEVICE_ATTR_2(in2_min, S_IRUGO | S_IWUSR, show_voltage, + set_voltage, MIN, 1); +static SENSOR_DEVICE_ATTR_2(in2_alarm, S_IRUGO, show_voltage, NULL, ALARM, 1); +static SENSOR_DEVICE_ATTR_2(temp1_input, S_IRUGO, show_temp, NULL, INPUT, 0); +static SENSOR_DEVICE_ATTR_2(temp1_alarm, S_IRUGO, show_temp, NULL, ALARM, 0); +static SENSOR_DEVICE_ATTR_2(temp1_fault, S_IRUGO, show_temp, NULL, FAULT, 0); +static SENSOR_DEVICE_ATTR_2(temp1_max, S_IRUGO | S_IWUSR, show_temp, set_temp, + MAX, 0); +static SENSOR_DEVICE_ATTR_2(temp1_min, S_IRUGO | S_IWUSR, show_temp, set_temp, + MIN, 0); +static SENSOR_DEVICE_ATTR_2(temp1_offset, S_IRUGO | S_IWUSR, show_temp, + set_temp, OFFSET, 0); +static SENSOR_DEVICE_ATTR_2(temp1_auto_point1_temp, S_IRUGO | S_IWUSR, + show_temp, set_temp, AUTOMIN, 0); +static SENSOR_DEVICE_ATTR_2(temp1_auto_point2_temp, S_IRUGO | S_IWUSR, + show_point2, set_point2, 0, 0); +static SENSOR_DEVICE_ATTR_2(temp1_crit, S_IRUGO | S_IWUSR, show_temp, set_temp, + THERM, 0); +static SENSOR_DEVICE_ATTR_2(temp1_crit_hyst, S_IRUGO | S_IWUSR, show_temp, + set_temp, HYSTERSIS, 0); +static SENSOR_DEVICE_ATTR_2(temp2_input, S_IRUGO, show_temp, NULL, INPUT, 1); +static SENSOR_DEVICE_ATTR_2(temp2_alarm, S_IRUGO, show_temp, NULL, ALARM, 1); +static SENSOR_DEVICE_ATTR_2(temp2_max, S_IRUGO | S_IWUSR, show_temp, set_temp, + MAX, 1); +static SENSOR_DEVICE_ATTR_2(temp2_min, S_IRUGO | S_IWUSR, show_temp, set_temp, + MIN, 1); +static SENSOR_DEVICE_ATTR_2(temp2_offset, S_IRUGO | S_IWUSR, show_temp, + set_temp, OFFSET, 1); +static SENSOR_DEVICE_ATTR_2(temp2_auto_point1_temp, S_IRUGO | S_IWUSR, + show_temp, set_temp, AUTOMIN, 1); +static SENSOR_DEVICE_ATTR_2(temp2_auto_point2_temp, S_IRUGO | S_IWUSR, + show_point2, set_point2, 0, 1); +static SENSOR_DEVICE_ATTR_2(temp2_crit, S_IRUGO | S_IWUSR, show_temp, set_temp, + THERM, 1); +static SENSOR_DEVICE_ATTR_2(temp2_crit_hyst, S_IRUGO | S_IWUSR, show_temp, + set_temp, HYSTERSIS, 1); +static SENSOR_DEVICE_ATTR_2(temp3_input, S_IRUGO, show_temp, NULL, INPUT, 2); +static SENSOR_DEVICE_ATTR_2(temp3_alarm, S_IRUGO, show_temp, NULL, ALARM, 2); +static SENSOR_DEVICE_ATTR_2(temp3_fault, S_IRUGO, show_temp, NULL, FAULT, 2); +static SENSOR_DEVICE_ATTR_2(temp3_max, S_IRUGO | S_IWUSR, show_temp, set_temp, + MAX, 2); +static SENSOR_DEVICE_ATTR_2(temp3_min, S_IRUGO | S_IWUSR, show_temp, set_temp, + MIN, 2); +static SENSOR_DEVICE_ATTR_2(temp3_offset, S_IRUGO | S_IWUSR, show_temp, + set_temp, OFFSET, 2); +static SENSOR_DEVICE_ATTR_2(temp3_auto_point1_temp, S_IRUGO | S_IWUSR, + show_temp, set_temp, AUTOMIN, 2); +static SENSOR_DEVICE_ATTR_2(temp3_auto_point2_temp, S_IRUGO | S_IWUSR, + show_point2, set_point2, 0, 2); +static SENSOR_DEVICE_ATTR_2(temp3_crit, S_IRUGO | S_IWUSR, show_temp, set_temp, + THERM, 2); +static SENSOR_DEVICE_ATTR_2(temp3_crit_hyst, S_IRUGO | S_IWUSR, show_temp, + set_temp, HYSTERSIS, 2); +static SENSOR_DEVICE_ATTR_2(fan1_input, S_IRUGO, show_tach, NULL, INPUT, 0); +static SENSOR_DEVICE_ATTR_2(fan1_min, S_IRUGO | S_IWUSR, show_tach, set_tach, + MIN, 0); +static SENSOR_DEVICE_ATTR_2(fan1_alarm, S_IRUGO, show_tach, NULL, ALARM, 0); +static SENSOR_DEVICE_ATTR_2(fan2_input, S_IRUGO, show_tach, NULL, INPUT, 1); +static SENSOR_DEVICE_ATTR_2(fan2_min, S_IRUGO | S_IWUSR, show_tach, set_tach, + MIN, 1); +static SENSOR_DEVICE_ATTR_2(fan2_alarm, S_IRUGO, show_tach, NULL, ALARM, 1); +static SENSOR_DEVICE_ATTR_2(fan3_input, S_IRUGO, show_tach, NULL, INPUT, 2); +static SENSOR_DEVICE_ATTR_2(fan3_min, S_IRUGO | S_IWUSR, show_tach, set_tach, + MIN, 2); +static SENSOR_DEVICE_ATTR_2(fan3_alarm, S_IRUGO, show_tach, NULL, ALARM, 2); +static SENSOR_DEVICE_ATTR_2(fan4_input, S_IRUGO, show_tach, NULL, INPUT, 3); +static SENSOR_DEVICE_ATTR_2(fan4_min, S_IRUGO | S_IWUSR, show_tach, set_tach, + MIN, 3); +static SENSOR_DEVICE_ATTR_2(fan4_alarm, S_IRUGO, show_tach, NULL, ALARM, 3); +static SENSOR_DEVICE_ATTR_2(pwm1, S_IRUGO | S_IWUSR, show_pwm, set_pwm, INPUT, + 0); +static SENSOR_DEVICE_ATTR_2(pwm1_freq, S_IRUGO | S_IWUSR, show_pwmfreq, + set_pwmfreq, INPUT, 0); +static SENSOR_DEVICE_ATTR_2(pwm1_enable, S_IRUGO | S_IWUSR, show_pwmctrl, + set_pwmctrl, INPUT, 0); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_channel_temp, S_IRUGO | S_IWUSR, + show_pwmchan, set_pwmchan, INPUT, 0); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point1_pwm, S_IRUGO | S_IWUSR, show_pwm, + set_pwm, MIN, 0); +static SENSOR_DEVICE_ATTR_2(pwm1_auto_point2_pwm, S_IRUGO | S_IWUSR, show_pwm, + set_pwm, MAX, 0); +static SENSOR_DEVICE_ATTR_2(pwm2, S_IRUGO | S_IWUSR, show_pwm, set_pwm, INPUT, + 1); +static SENSOR_DEVICE_ATTR_2(pwm2_freq, S_IRUGO | S_IWUSR, show_pwmfreq, + set_pwmfreq, INPUT, 1); +static SENSOR_DEVICE_ATTR_2(pwm2_enable, S_IRUGO | S_IWUSR, show_pwmctrl, + set_pwmctrl, INPUT, 1); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_channel_temp, S_IRUGO | S_IWUSR, + show_pwmchan, set_pwmchan, INPUT, 1); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point1_pwm, S_IRUGO | S_IWUSR, show_pwm, + set_pwm, MIN, 1); +static SENSOR_DEVICE_ATTR_2(pwm2_auto_point2_pwm, S_IRUGO | S_IWUSR, show_pwm, + set_pwm, MAX, 1); +static SENSOR_DEVICE_ATTR_2(pwm3, S_IRUGO | S_IWUSR, show_pwm, set_pwm, INPUT, + 2); +static SENSOR_DEVICE_ATTR_2(pwm3_freq, S_IRUGO | S_IWUSR, show_pwmfreq, + set_pwmfreq, INPUT, 2); +static SENSOR_DEVICE_ATTR_2(pwm3_enable, S_IRUGO | S_IWUSR, show_pwmctrl, + set_pwmctrl, INPUT, 2); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_channel_temp, S_IRUGO | S_IWUSR, + show_pwmchan, set_pwmchan, INPUT, 2); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point1_pwm, S_IRUGO | S_IWUSR, show_pwm, + set_pwm, MIN, 2); +static SENSOR_DEVICE_ATTR_2(pwm3_auto_point2_pwm, S_IRUGO | S_IWUSR, show_pwm, + set_pwm, MAX, 2); + +static struct attribute *adt7475_attrs[] = { + &sensor_dev_attr_in1_input.dev_attr.attr, + &sensor_dev_attr_in1_max.dev_attr.attr, + &sensor_dev_attr_in1_min.dev_attr.attr, + &sensor_dev_attr_in1_alarm.dev_attr.attr, + &sensor_dev_attr_in2_input.dev_attr.attr, + &sensor_dev_attr_in2_max.dev_attr.attr, + &sensor_dev_attr_in2_min.dev_attr.attr, + &sensor_dev_attr_in2_alarm.dev_attr.attr, + &sensor_dev_attr_temp1_input.dev_attr.attr, + &sensor_dev_attr_temp1_alarm.dev_attr.attr, + &sensor_dev_attr_temp1_fault.dev_attr.attr, + &sensor_dev_attr_temp1_max.dev_attr.attr, + &sensor_dev_attr_temp1_min.dev_attr.attr, + &sensor_dev_attr_temp1_offset.dev_attr.attr, + &sensor_dev_attr_temp1_auto_point1_temp.dev_attr.attr, + &sensor_dev_attr_temp1_auto_point2_temp.dev_attr.attr, + &sensor_dev_attr_temp1_crit.dev_attr.attr, + &sensor_dev_attr_temp1_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp2_input.dev_attr.attr, + &sensor_dev_attr_temp2_alarm.dev_attr.attr, + &sensor_dev_attr_temp2_max.dev_attr.attr, + &sensor_dev_attr_temp2_min.dev_attr.attr, + &sensor_dev_attr_temp2_offset.dev_attr.attr, + &sensor_dev_attr_temp2_auto_point1_temp.dev_attr.attr, + &sensor_dev_attr_temp2_auto_point2_temp.dev_attr.attr, + &sensor_dev_attr_temp2_crit.dev_attr.attr, + &sensor_dev_attr_temp2_crit_hyst.dev_attr.attr, + &sensor_dev_attr_temp3_input.dev_attr.attr, + &sensor_dev_attr_temp3_fault.dev_attr.attr, + &sensor_dev_attr_temp3_alarm.dev_attr.attr, + &sensor_dev_attr_temp3_max.dev_attr.attr, + &sensor_dev_attr_temp3_min.dev_attr.attr, + &sensor_dev_attr_temp3_offset.dev_attr.attr, + &sensor_dev_attr_temp3_auto_point1_temp.dev_attr.attr, + &sensor_dev_attr_temp3_auto_point2_temp.dev_attr.attr, + &sensor_dev_attr_temp3_crit.dev_attr.attr, + &sensor_dev_attr_temp3_crit_hyst.dev_attr.attr, + &sensor_dev_attr_fan1_input.dev_attr.attr, + &sensor_dev_attr_fan1_min.dev_attr.attr, + &sensor_dev_attr_fan1_alarm.dev_attr.attr, + &sensor_dev_attr_fan2_input.dev_attr.attr, + &sensor_dev_attr_fan2_min.dev_attr.attr, + &sensor_dev_attr_fan2_alarm.dev_attr.attr, + &sensor_dev_attr_fan3_input.dev_attr.attr, + &sensor_dev_attr_fan3_min.dev_attr.attr, + &sensor_dev_attr_fan3_alarm.dev_attr.attr, + &sensor_dev_attr_fan4_input.dev_attr.attr, + &sensor_dev_attr_fan4_min.dev_attr.attr, + &sensor_dev_attr_fan4_alarm.dev_attr.attr, + &sensor_dev_attr_pwm1.dev_attr.attr, + &sensor_dev_attr_pwm1_freq.dev_attr.attr, + &sensor_dev_attr_pwm1_enable.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_channel_temp.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point1_pwm.dev_attr.attr, + &sensor_dev_attr_pwm1_auto_point2_pwm.dev_attr.attr, + &sensor_dev_attr_pwm2.dev_attr.attr, + &sensor_dev_attr_pwm2_freq.dev_attr.attr, + &sensor_dev_attr_pwm2_enable.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_channel_temp.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point1_pwm.dev_attr.attr, + &sensor_dev_attr_pwm2_auto_point2_pwm.dev_attr.attr, + &sensor_dev_attr_pwm3.dev_attr.attr, + &sensor_dev_attr_pwm3_freq.dev_attr.attr, + &sensor_dev_attr_pwm3_enable.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_channel_temp.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point1_pwm.dev_attr.attr, + &sensor_dev_attr_pwm3_auto_point2_pwm.dev_attr.attr, + NULL, +}; + +struct attribute_group adt7475_attr_group = { .attrs = adt7475_attrs }; + +static int adt7475_detect(struct i2c_client *client, int kind, + struct i2c_board_info *info) +{ + struct i2c_adapter *adapter = client->adapter; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) + return -ENODEV; + + if (kind <= 0) { + if (adt7475_read(REG_VENDID) != 0x41 || + adt7475_read(REG_DEVID) != 0x75) { + dev_err(&adapter->dev, + "Couldn't detect a adt7475 part at 0x%02x\n", + (unsigned int)client->addr); + return -ENODEV; + } + } + + strlcpy(info->type, adt7475_id[0].name, I2C_NAME_SIZE); + + return 0; +} + +static int adt7475_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct adt7475_data *data; + int i, ret = 0; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + + mutex_init(&data->lock); + i2c_set_clientdata(client, data); + + /* Call adt7475_read_pwm for all pwm's as this will reprogram any + pwm's which are disabled to manual mode with 0% duty cycle */ + for (i = 0; i < ADT7475_PWM_COUNT; i++) + adt7475_read_pwm(client, i); + + ret = sysfs_create_group(&client->dev.kobj, &adt7475_attr_group); + if (ret) + goto efree; + + data->hwmon_dev = hwmon_device_register(&client->dev); + if (IS_ERR(data->hwmon_dev)) { + ret = PTR_ERR(data->hwmon_dev); + goto eremove; + } + + return 0; + +eremove: + sysfs_remove_group(&client->dev.kobj, &adt7475_attr_group); +efree: + kfree(data); + return ret; +} + +static int adt7475_remove(struct i2c_client *client) +{ + struct adt7475_data *data = i2c_get_clientdata(client); + + hwmon_device_unregister(data->hwmon_dev); + sysfs_remove_group(&client->dev.kobj, &adt7475_attr_group); + kfree(data); + + return 0; +} + +static struct i2c_driver adt7475_driver = { + .class = I2C_CLASS_HWMON, + .driver = { + .name = "adt7475", + }, + .probe = adt7475_probe, + .remove = adt7475_remove, + .id_table = adt7475_id, + .detect = adt7475_detect, + .address_data = &addr_data, +}; + +static void adt7475_read_hystersis(struct i2c_client *client) +{ + struct adt7475_data *data = i2c_get_clientdata(client); + + data->temp[HYSTERSIS][0] = (u16) adt7475_read(REG_REMOTE1_HYSTERSIS); + data->temp[HYSTERSIS][1] = data->temp[HYSTERSIS][0]; + data->temp[HYSTERSIS][2] = (u16) adt7475_read(REG_REMOTE2_HYSTERSIS); +} + +static void adt7475_read_pwm(struct i2c_client *client, int index) +{ + struct adt7475_data *data = i2c_get_clientdata(client); + unsigned int v; + + data->pwm[CONTROL][index] = adt7475_read(PWM_CONFIG_REG(index)); + + /* Figure out the internal value for pwmctrl and pwmchan + based on the current settings */ + v = (data->pwm[CONTROL][index] >> 5) & 7; + + if (v == 3) + data->pwmctl[index] = 0; + else if (v == 7) + data->pwmctl[index] = 1; + else if (v == 4) { + /* The fan is disabled - we don't want to + support that, so change to manual mode and + set the duty cycle to 0 instead + */ + data->pwm[INPUT][index] = 0; + data->pwm[CONTROL][index] &= ~0xE0; + data->pwm[CONTROL][index] |= (7 << 5); + + i2c_smbus_write_byte_data(client, PWM_CONFIG_REG(index), + data->pwm[INPUT][index]); + + i2c_smbus_write_byte_data(client, PWM_CONFIG_REG(index), + data->pwm[CONTROL][index]); + + data->pwmctl[index] = 1; + } else { + data->pwmctl[index] = 2; + + switch (v) { + case 0: + data->pwmchan[index] = 1; + break; + case 1: + data->pwmchan[index] = 2; + break; + case 2: + data->pwmchan[index] = 4; + break; + case 5: + data->pwmchan[index] = 6; + break; + case 6: + data->pwmchan[index] = 7; + break; + } + } +} + +static struct adt7475_data *adt7475_update_device(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct adt7475_data *data = i2c_get_clientdata(client); + u8 ext; + int i; + + mutex_lock(&data->lock); + + /* Measurement values update every 2 seconds */ + if (time_after(jiffies, data->measure_updated + HZ * 2) || + !data->valid) { + data->alarms = adt7475_read(REG_STATUS2) << 8; + data->alarms |= adt7475_read(REG_STATUS1); + + ext = adt7475_read(REG_EXTEND1); + for (i = 0; i < ADT7475_VOLTAGE_COUNT; i++) + data->voltage[INPUT][i] = + (adt7475_read(VOLTAGE_REG(i)) << 2) | + ((ext >> ((i + 1) * 2)) & 3); + + ext = adt7475_read(REG_EXTEND2); + for (i = 0; i < ADT7475_TEMP_COUNT; i++) + data->temp[INPUT][i] = + (adt7475_read(TEMP_REG(i)) << 2) | + ((ext >> ((i + 1) * 2)) & 3); + + for (i = 0; i < ADT7475_TACH_COUNT; i++) + data->tach[INPUT][i] = + adt7475_read_word(client, TACH_REG(i)); + + /* Updated by hw when in auto mode */ + for (i = 0; i < ADT7475_PWM_COUNT; i++) + data->pwm[INPUT][i] = adt7475_read(PWM_REG(i)); + + data->measure_updated = jiffies; + } + + /* Limits and settings, should never change update every 60 seconds */ + if (time_after(jiffies, data->limits_updated + HZ * 2) || + !data->valid) { + data->config5 = adt7475_read(REG_CONFIG5); + + for (i = 0; i < ADT7475_VOLTAGE_COUNT; i++) { + /* Adjust values so they match the input precision */ + data->voltage[MIN][i] = + adt7475_read(VOLTAGE_MIN_REG(i)) << 2; + data->voltage[MAX][i] = + adt7475_read(VOLTAGE_MAX_REG(i)) << 2; + } + + for (i = 0; i < ADT7475_TEMP_COUNT; i++) { + /* Adjust values so they match the input precision */ + data->temp[MIN][i] = + adt7475_read(TEMP_MIN_REG(i)) << 2; + data->temp[MAX][i] = + adt7475_read(TEMP_MAX_REG(i)) << 2; + data->temp[AUTOMIN][i] = + adt7475_read(TEMP_TMIN_REG(i)) << 2; + data->temp[THERM][i] = + adt7475_read(TEMP_THERM_REG(i)) << 2; + data->temp[OFFSET][i] = + adt7475_read(TEMP_OFFSET_REG(i)); + } + adt7475_read_hystersis(client); + + for (i = 0; i < ADT7475_TACH_COUNT; i++) + data->tach[MIN][i] = + adt7475_read_word(client, TACH_MIN_REG(i)); + + for (i = 0; i < ADT7475_PWM_COUNT; i++) { + data->pwm[MAX][i] = adt7475_read(PWM_MAX_REG(i)); + data->pwm[MIN][i] = adt7475_read(PWM_MIN_REG(i)); + /* Set the channel and control information */ + adt7475_read_pwm(client, i); + } + + data->range[0] = adt7475_read(TEMP_TRANGE_REG(0)); + data->range[1] = adt7475_read(TEMP_TRANGE_REG(1)); + data->range[2] = adt7475_read(TEMP_TRANGE_REG(2)); + + data->limits_updated = jiffies; + data->valid = 1; + } + + mutex_unlock(&data->lock); + + return data; +} + +static int __init sensors_adt7475_init(void) +{ + return i2c_add_driver(&adt7475_driver); +} + +static void __exit sensors_adt7475_exit(void) +{ + i2c_del_driver(&adt7475_driver); +} + +MODULE_AUTHOR("Advanced Micro Devices, Inc"); +MODULE_DESCRIPTION("adt7475 driver"); +MODULE_LICENSE("GPL"); + +module_init(sensors_adt7475_init); +module_exit(sensors_adt7475_exit); -- cgit From 058943ddcb7cb307a0c406088c1e61f203d8b66f Mon Sep 17 00:00:00 2001 From: Alistair John Strachan Date: Thu, 15 Jan 2009 22:27:47 +0100 Subject: hwmon: (abituguru3) Match partial DMI board name strings The switch-over to using DMI board strings to identify abituguru3 compatible mainboards works most of the time, but sometimes the vendor has substantially modified the board string between BIOS revisions. We have found that the vendor chipset identification string (provided in brackets) changes frequently and is of no use to us. The rest of the board string sometimes changes in subtle ways, e.g. whitespace or variations in capitalization. The new comparison code checks only a part of the supplied DMI board string, trimming the bracketed content, whitespace, and ignoring case as necessary. This fixes a bug where an IP35 Pro running an early BIOS would not be detected without the force=1 module parameter, and also speculatively fixes other similiar issues. Signed-off-by: Alistair John Strachan Reported-by: Nick Pasich Cc: Hans de Goede Signed-off-by: Jean Delvare --- drivers/hwmon/abituguru3.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/hwmon/abituguru3.c b/drivers/hwmon/abituguru3.c index 70bb854086d..4914b34e6bc 100644 --- a/drivers/hwmon/abituguru3.c +++ b/drivers/hwmon/abituguru3.c @@ -279,7 +279,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = { { "OTES1 Fan", 36, 2, 60, 1, 0 }, { NULL, 0, 0, 0, 0, 0 } } }, - { 0x0011, "AT8 32X(ATI RD580-ULI M1575)", { + { 0x0011, "AT8 32X", { { "CPU Core", 0, 0, 10, 1, 0 }, { "DDR", 1, 0, 20, 1, 0 }, { "DDR VTT", 2, 0, 10, 1, 0 }, @@ -402,7 +402,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = { { "AUX3 Fan", 36, 2, 60, 1, 0 }, { NULL, 0, 0, 0, 0, 0 } } }, - { 0x0016, "AW9D-MAX (Intel i975-ICH7)", { + { 0x0016, "AW9D-MAX", { { "CPU Core", 0, 0, 10, 1, 0 }, { "DDR2", 1, 0, 20, 1, 0 }, { "DDR2 VTT", 2, 0, 10, 1, 0 }, @@ -509,7 +509,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = { { "AUX3 FAN", 36, 2, 60, 1, 0 }, { NULL, 0, 0, 0, 0, 0 } } }, - { 0x001A, "IP35 Pro(Intel P35-ICH9R)", { + { 0x001A, "IP35 Pro", { { "CPU Core", 0, 0, 10, 1, 0 }, { "DDR2", 1, 0, 20, 1, 0 }, { "DDR2 VTT", 2, 0, 10, 1, 0 }, @@ -1128,6 +1128,7 @@ static int __init abituguru3_dmi_detect(void) { const char *board_vendor, *board_name; int i, err = (force) ? 1 : -ENODEV; + size_t sublen; board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR); if (!board_vendor || strcmp(board_vendor, "http://www.abit.com.tw/")) @@ -1137,9 +1138,20 @@ static int __init abituguru3_dmi_detect(void) if (!board_name) return err; + /* At the moment, we don't care about the part of the vendor + * DMI string contained in brackets. Truncate the string at + * the first occurrence of a bracket. Trim any trailing space + * from the substring. + */ + sublen = strcspn(board_name, "("); + while (sublen > 0 && board_name[sublen - 1] == ' ') + sublen--; + for (i = 0; abituguru3_motherboards[i].id; i++) { const char *dmi_name = abituguru3_motherboards[i].dmi_name; - if (dmi_name && !strcmp(dmi_name, board_name)) + if (!dmi_name || strlen(dmi_name) != sublen) + continue; + if (!strncasecmp(board_name, dmi_name, sublen)) break; } -- cgit From 3907a8def78a15cd91985c23a3e76b563f36929a Mon Sep 17 00:00:00 2001 From: Alistair John Strachan Date: Thu, 15 Jan 2009 22:27:48 +0100 Subject: hwmon: (abituguru3) Enable DMI probing feature on IN9 32X MAX Switch the IN9 32X MAX over from port probing to the preferred DMI probe method. Signed-off-by: Alistair John Strachan Tested-by: Paul Hartman Cc: Hans de Goede Signed-off-by: Jean Delvare --- drivers/hwmon/abituguru3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/abituguru3.c b/drivers/hwmon/abituguru3.c index 4914b34e6bc..f948ed16e96 100644 --- a/drivers/hwmon/abituguru3.c +++ b/drivers/hwmon/abituguru3.c @@ -482,7 +482,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = { { "AUX3 Fan", 36, 2, 60, 1, 0 }, { NULL, 0, 0, 0, 0, 0 } } }, - { 0x0019, NULL /* Unknown, need DMI string */, { + { 0x0019, "IN9 32X MAX", { { "CPU Core", 7, 0, 10, 1, 0 }, { "DDR2", 13, 0, 20, 1, 0 }, { "DDR2 VTT", 14, 0, 10, 1, 0 }, -- cgit From 46a5f173fc88ffc22651162033696d8a9fbcdc5c Mon Sep 17 00:00:00 2001 From: Alistair John Strachan Date: Thu, 15 Jan 2009 22:27:48 +0100 Subject: hwmon: (abituguru3) Fix CONFIG_DMI=n fallback to probe When CONFIG_DMI is not enabled, dmi detection should flag that no board could be detected (err=1) rather than another error condition (err<0). This fixes the fallback to manual probing for all motherboards, even those without DMI strings, when CONFIG_DMI=n. Signed-off-by: Alistair John Strachan Cc: Hans de Goede Signed-off-by: Jean Delvare --- drivers/hwmon/abituguru3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/abituguru3.c b/drivers/hwmon/abituguru3.c index f948ed16e96..e52b38806d0 100644 --- a/drivers/hwmon/abituguru3.c +++ b/drivers/hwmon/abituguru3.c @@ -1165,7 +1165,7 @@ static int __init abituguru3_dmi_detect(void) static inline int abituguru3_dmi_detect(void) { - return -ENODEV; + return 1; } #endif /* CONFIG_DMI */ -- cgit From a58c891a53aca81c78f9cbe0572a301042470e96 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 15 Jan 2009 15:29:35 -0800 Subject: b44: GFP_DMA skb should not escape from driver b44 chip has some hardware limitations, that need GFP_DMA bounce buffers in some situations. In order to not deplete DMA zone, we should keep allocated GFP_DMA skb only for driver use. At rx time, we copy such skb to newly allocated skb, reusing existing copybreak infrastructure. On machines with low amount of memory, all skb meet the hardware limitation, so no copy is needed. We detect this situation using a new device flag, set to one if one GFP_DMA skb was ever allocated by b44_alloc_rx_skb(). Previously allocated skb, even outside from DMA zone will then be recycled, to have minimal impact on DMA zone use. Signed-off-by: Eric Dumazet Tested-by: Ionut Leonte Signed-off-by: David S. Miller --- drivers/net/b44.c | 4 +++- drivers/net/b44.h | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/b44.c b/drivers/net/b44.c index 5ae131c147f..c38512ebcea 100644 --- a/drivers/net/b44.c +++ b/drivers/net/b44.c @@ -679,6 +679,7 @@ static int b44_alloc_rx_skb(struct b44 *bp, int src_idx, u32 dest_idx_unmasked) dev_kfree_skb_any(skb); return -ENOMEM; } + bp->force_copybreak = 1; } rh = (struct rx_header *) skb->data; @@ -800,7 +801,7 @@ static int b44_rx(struct b44 *bp, int budget) /* Omit CRC. */ len -= 4; - if (len > RX_COPY_THRESHOLD) { + if (!bp->force_copybreak && len > RX_COPY_THRESHOLD) { int skb_size; skb_size = b44_alloc_rx_skb(bp, cons, bp->rx_prod); if (skb_size < 0) @@ -2152,6 +2153,7 @@ static int __devinit b44_init_one(struct ssb_device *sdev, bp = netdev_priv(dev); bp->sdev = sdev; bp->dev = dev; + bp->force_copybreak = 0; bp->msg_enable = netif_msg_init(b44_debug, B44_DEF_MSG_ENABLE); diff --git a/drivers/net/b44.h b/drivers/net/b44.h index 7db0c84a795..e678498de6d 100644 --- a/drivers/net/b44.h +++ b/drivers/net/b44.h @@ -395,7 +395,7 @@ struct b44 { u32 rx_pending; u32 tx_pending; u8 phy_addr; - + u8 force_copybreak; struct mii_if_info mii_if; }; -- cgit From b5db0e38653bfada34a92f360b4111566ede3842 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 15 Jan 2009 15:32:12 -0800 Subject: Revert "x86 PAT: remove CPA WARN_ON for zero pte" This reverts commit 58dab916dfb57328d50deb0aa9b3fc92efa248ff, which makes my Nehalem come to a nasty crawling almost-halt. It looks like it turns off caching of regular kernel RAM, with the understandable slowdown of a few orders of magnitude as a result. Acked-by: Ingo Molnar Cc: Yinghai Lu Cc: Peter Anvin Cc: Venkatesh Pallipadi Cc: Suresh Siddha Signed-off-by: Linus Torvalds --- arch/x86/mm/pageattr.c | 10 ++++------ arch/x86/mm/pat.c | 45 +++++++++++++-------------------------------- 2 files changed, 17 insertions(+), 38 deletions(-) diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 4cf30dee816..e89d24815f2 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -555,12 +555,10 @@ repeat: if (!pte_val(old_pte)) { if (!primary) return 0; - - /* - * Special error value returned, indicating that the mapping - * did not exist at this address. - */ - return -EFAULT; + WARN(1, KERN_WARNING "CPA: called for zero pte. " + "vaddr = %lx cpa->vaddr = %lx\n", address, + *cpa->vaddr); + return -EINVAL; } if (level == PG_LEVEL_4K) { diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 160c42d3eb8..8b08fb95527 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -505,35 +505,6 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) } #endif /* CONFIG_STRICT_DEVMEM */ -/* - * Change the memory type for the physial address range in kernel identity - * mapping space if that range is a part of identity map. - */ -static int kernel_map_sync_memtype(u64 base, unsigned long size, - unsigned long flags) -{ - unsigned long id_sz; - int ret; - - if (!pat_enabled || base >= __pa(high_memory)) - return 0; - - id_sz = (__pa(high_memory) < base + size) ? - __pa(high_memory) - base : - size; - - ret = ioremap_change_attr((unsigned long)__va(base), id_sz, flags); - /* - * -EFAULT return means that the addr was not valid and did not have - * any identity mapping. That case is a success for - * kernel_map_sync_memtype. - */ - if (ret == -EFAULT) - ret = 0; - - return ret; -} - int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { @@ -584,7 +555,9 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (retval < 0) return 0; - if (kernel_map_sync_memtype(offset, size, flags)) { + if (((pfn < max_low_pfn_mapped) || + (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) && + ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { free_memtype(offset, offset + size); printk(KERN_INFO "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", @@ -632,7 +605,7 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, int strict_prot) { int is_ram = 0; - int ret; + int id_sz, ret; unsigned long flags; unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK); @@ -673,7 +646,15 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot, flags); } - if (kernel_map_sync_memtype(paddr, size, flags)) { + /* Need to keep identity mapping in sync */ + if (paddr >= __pa(high_memory)) + return 0; + + id_sz = (__pa(high_memory) < paddr + size) ? + __pa(high_memory) - paddr : + size; + + if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) { free_memtype(paddr, paddr + size); printk(KERN_ERR "%s:%d reserve_pfn_range ioremap_change_attr failed %s " -- cgit From db0fb1848a645b0b1b033765f3a5244e7afd2e3c Mon Sep 17 00:00:00 2001 From: Peter W Morreale Date: Thu, 15 Jan 2009 13:50:42 -0800 Subject: Update of Documentation: vm.txt and proc.txt Update Documentation/sysctl/vm.txt and Documentation/filesystems/proc.txt. More specifically, the section on /proc/sys/vm in Documentation/filesystems/proc.txt was removed and a link to Documentation/sysctl/vm.txt added. Most of the verbiage from proc.txt was simply moved in vm.txt, with new addtional text for "swappiness" and "stat_interval". Signed-off-by: Peter W Morreale Acked-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 288 +---------------- Documentation/sysctl/vm.txt | 619 ++++++++++++++++++++++++++----------- 2 files changed, 437 insertions(+), 470 deletions(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index d105eb45282..bbebc3a43ac 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1371,292 +1371,8 @@ auto_msgmni default value is 1. 2.4 /proc/sys/vm - The virtual memory subsystem ----------------------------------------------- -The files in this directory can be used to tune the operation of the virtual -memory (VM) subsystem of the Linux kernel. - -vfs_cache_pressure ------------------- - -Controls the tendency of the kernel to reclaim the memory which is used for -caching of directory and inode objects. - -At the default value of vfs_cache_pressure=100 the kernel will attempt to -reclaim dentries and inodes at a "fair" rate with respect to pagecache and -swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer -to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100 -causes the kernel to prefer to reclaim dentries and inodes. - -dirty_background_bytes ----------------------- - -Contains the amount of dirty memory at which the pdflush background writeback -daemon will start writeback. - -If dirty_background_bytes is written, dirty_background_ratio becomes a function -of its value (dirty_background_bytes / the amount of dirtyable system memory). - -dirty_background_ratio ----------------------- - -Contains, as a percentage of the dirtyable system memory (free pages + mapped -pages + file cache, not including locked pages and HugePages), the number of -pages at which the pdflush background writeback daemon will start writing out -dirty data. - -If dirty_background_ratio is written, dirty_background_bytes becomes a function -of its value (dirty_background_ratio * the amount of dirtyable system memory). - -dirty_bytes ------------ - -Contains the amount of dirty memory at which a process generating disk writes -will itself start writeback. - -If dirty_bytes is written, dirty_ratio becomes a function of its value -(dirty_bytes / the amount of dirtyable system memory). - -dirty_ratio ------------ - -Contains, as a percentage of the dirtyable system memory (free pages + mapped -pages + file cache, not including locked pages and HugePages), the number of -pages at which a process which is generating disk writes will itself start -writing out dirty data. - -If dirty_ratio is written, dirty_bytes becomes a function of its value -(dirty_ratio * the amount of dirtyable system memory). - -dirty_writeback_centisecs -------------------------- - -The pdflush writeback daemons will periodically wake up and write `old' data -out to disk. This tunable expresses the interval between those wakeups, in -100'ths of a second. - -Setting this to zero disables periodic writeback altogether. - -dirty_expire_centisecs ----------------------- - -This tunable is used to define when dirty data is old enough to be eligible -for writeout by the pdflush daemons. It is expressed in 100'ths of a second. -Data which has been dirty in-memory for longer than this interval will be -written out next time a pdflush daemon wakes up. - -highmem_is_dirtyable --------------------- - -Only present if CONFIG_HIGHMEM is set. - -This defaults to 0 (false), meaning that the ratios set above are calculated -as a percentage of lowmem only. This protects against excessive scanning -in page reclaim, swapping and general VM distress. - -Setting this to 1 can be useful on 32 bit machines where you want to make -random changes within an MMAPed file that is larger than your available -lowmem without causing large quantities of random IO. Is is safe if the -behavior of all programs running on the machine is known and memory will -not be otherwise stressed. - -legacy_va_layout ----------------- - -If non-zero, this sysctl disables the new 32-bit mmap mmap layout - the kernel -will use the legacy (2.4) layout for all processes. - -lowmem_reserve_ratio ---------------------- - -For some specialised workloads on highmem machines it is dangerous for -the kernel to allow process memory to be allocated from the "lowmem" -zone. This is because that memory could then be pinned via the mlock() -system call, or by unavailability of swapspace. - -And on large highmem machines this lack of reclaimable lowmem memory -can be fatal. - -So the Linux page allocator has a mechanism which prevents allocations -which _could_ use highmem from using too much lowmem. This means that -a certain amount of lowmem is defended from the possibility of being -captured into pinned user memory. - -(The same argument applies to the old 16 megabyte ISA DMA region. This -mechanism will also defend that region from allocations which could use -highmem or lowmem). - -The `lowmem_reserve_ratio' tunable determines how aggressive the kernel is -in defending these lower zones. - -If you have a machine which uses highmem or ISA DMA and your -applications are using mlock(), or if you are running with no swap then -you probably should change the lowmem_reserve_ratio setting. - -The lowmem_reserve_ratio is an array. You can see them by reading this file. -- -% cat /proc/sys/vm/lowmem_reserve_ratio -256 256 32 -- -Note: # of this elements is one fewer than number of zones. Because the highest - zone's value is not necessary for following calculation. - -But, these values are not used directly. The kernel calculates # of protection -pages for each zones from them. These are shown as array of protection pages -in /proc/zoneinfo like followings. (This is an example of x86-64 box). -Each zone has an array of protection pages like this. - -- -Node 0, zone DMA - pages free 1355 - min 3 - low 3 - high 4 - : - : - numa_other 0 - protection: (0, 2004, 2004, 2004) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - pagesets - cpu: 0 pcp: 0 - : -- -These protections are added to score to judge whether this zone should be used -for page allocation or should be reclaimed. - -In this example, if normal pages (index=2) are required to this DMA zone and -pages_high is used for watermark, the kernel judges this zone should not be -used because pages_free(1355) is smaller than watermark + protection[2] -(4 + 2004 = 2008). If this protection value is 0, this zone would be used for -normal page requirement. If requirement is DMA zone(index=0), protection[0] -(=0) is used. - -zone[i]'s protection[j] is calculated by following expression. - -(i < j): - zone[i]->protection[j] - = (total sums of present_pages from zone[i+1] to zone[j] on the node) - / lowmem_reserve_ratio[i]; -(i = j): - (should not be protected. = 0; -(i > j): - (not necessary, but looks 0) - -The default values of lowmem_reserve_ratio[i] are - 256 (if zone[i] means DMA or DMA32 zone) - 32 (others). -As above expression, they are reciprocal number of ratio. -256 means 1/256. # of protection pages becomes about "0.39%" of total present -pages of higher zones on the node. - -If you would like to protect more pages, smaller values are effective. -The minimum value is 1 (1/1 -> 100%). - -page-cluster ------------- - -page-cluster controls the number of pages which are written to swap in -a single attempt. The swap I/O size. - -It is a logarithmic value - setting it to zero means "1 page", setting -it to 1 means "2 pages", setting it to 2 means "4 pages", etc. - -The default value is three (eight pages at a time). There may be some -small benefits in tuning this to a different value if your workload is -swap-intensive. - -overcommit_memory ------------------ - -Controls overcommit of system memory, possibly allowing processes -to allocate (but not use) more memory than is actually available. - - -0 - Heuristic overcommit handling. Obvious overcommits of - address space are refused. Used for a typical system. It - ensures a seriously wild allocation fails while allowing - overcommit to reduce swap usage. root is allowed to - allocate slightly more memory in this mode. This is the - default. - -1 - Always overcommit. Appropriate for some scientific - applications. - -2 - Don't overcommit. The total address space commit - for the system is not permitted to exceed swap plus a - configurable percentage (default is 50) of physical RAM. - Depending on the percentage you use, in most situations - this means a process will not be killed while attempting - to use already-allocated memory but will receive errors - on memory allocation as appropriate. - -overcommit_ratio ----------------- - -Percentage of physical memory size to include in overcommit calculations -(see above.) - -Memory allocation limit = swapspace + physmem * (overcommit_ratio / 100) - - swapspace = total size of all swap areas - physmem = size of physical memory in system - -nr_hugepages and hugetlb_shm_group ----------------------------------- - -nr_hugepages configures number of hugetlb page reserved for the system. - -hugetlb_shm_group contains group id that is allowed to create SysV shared -memory segment using hugetlb page. - -hugepages_treat_as_movable --------------------------- - -This parameter is only useful when kernelcore= is specified at boot time to -create ZONE_MOVABLE for pages that may be reclaimed or migrated. Huge pages -are not movable so are not normally allocated from ZONE_MOVABLE. A non-zero -value written to hugepages_treat_as_movable allows huge pages to be allocated -from ZONE_MOVABLE. - -Once enabled, the ZONE_MOVABLE is treated as an area of memory the huge -pages pool can easily grow or shrink within. Assuming that applications are -not running that mlock() a lot of memory, it is likely the huge pages pool -can grow to the size of ZONE_MOVABLE by repeatedly entering the desired value -into nr_hugepages and triggering page reclaim. - -laptop_mode ------------ - -laptop_mode is a knob that controls "laptop mode". All the things that are -controlled by this knob are discussed in Documentation/laptops/laptop-mode.txt. - -block_dump ----------- - -block_dump enables block I/O debugging when set to a nonzero value. More -information on block I/O debugging is in Documentation/laptops/laptop-mode.txt. - -swap_token_timeout ------------------- - -This file contains valid hold time of swap out protection token. The Linux -VM has token based thrashing control mechanism and uses the token to prevent -unnecessary page faults in thrashing situation. The unit of the value is -second. The value would be useful to tune thrashing behavior. - -drop_caches ------------ - -Writing to this will cause the kernel to drop clean caches, dentries and -inodes from memory, causing that memory to become free. - -To free pagecache: - echo 1 > /proc/sys/vm/drop_caches -To free dentries and inodes: - echo 2 > /proc/sys/vm/drop_caches -To free pagecache, dentries and inodes: - echo 3 > /proc/sys/vm/drop_caches - -As this is a non-destructive operation and dirty objects are not freeable, the -user should run `sync' first. +Please see: Documentation/sysctls/vm.txt for a description of these +entries. 2.5 /proc/sys/dev - Device specific parameters diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index a3415070bca..3197fc83bc5 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -1,12 +1,13 @@ -Documentation for /proc/sys/vm/* kernel version 2.2.10 +Documentation for /proc/sys/vm/* kernel version 2.6.29 (c) 1998, 1999, Rik van Riel + (c) 2008 Peter W. Morreale For general info and legal blurb, please look in README. ============================================================== This file contains the documentation for the sysctl files in -/proc/sys/vm and is valid for Linux kernel version 2.2. +/proc/sys/vm and is valid for Linux kernel version 2.6.29. The files in this directory can be used to tune the operation of the virtual memory (VM) subsystem of the Linux kernel and @@ -16,180 +17,274 @@ Default values and initialization routines for most of these files can be found in mm/swap.c. Currently, these files are in /proc/sys/vm: -- overcommit_memory -- page-cluster -- dirty_ratio + +- block_dump +- dirty_background_bytes - dirty_background_ratio +- dirty_bytes - dirty_expire_centisecs +- dirty_ratio - dirty_writeback_centisecs -- highmem_is_dirtyable (only if CONFIG_HIGHMEM set) +- drop_caches +- hugepages_treat_as_movable +- hugetlb_shm_group +- laptop_mode +- legacy_va_layout +- lowmem_reserve_ratio - max_map_count - min_free_kbytes -- laptop_mode -- block_dump -- drop-caches -- zone_reclaim_mode -- min_unmapped_ratio - min_slab_ratio -- panic_on_oom -- oom_dump_tasks -- oom_kill_allocating_task -- mmap_min_address -- numa_zonelist_order +- min_unmapped_ratio +- mmap_min_addr - nr_hugepages - nr_overcommit_hugepages -- nr_trim_pages (only if CONFIG_MMU=n) +- nr_pdflush_threads +- nr_trim_pages (only if CONFIG_MMU=n) +- numa_zonelist_order +- oom_dump_tasks +- oom_kill_allocating_task +- overcommit_memory +- overcommit_ratio +- page-cluster +- panic_on_oom +- percpu_pagelist_fraction +- stat_interval +- swappiness +- vfs_cache_pressure +- zone_reclaim_mode + ============================================================== -dirty_bytes, dirty_ratio, dirty_background_bytes, -dirty_background_ratio, dirty_expire_centisecs, -dirty_writeback_centisecs, highmem_is_dirtyable, -vfs_cache_pressure, laptop_mode, block_dump, swap_token_timeout, -drop-caches, hugepages_treat_as_movable: +block_dump -See Documentation/filesystems/proc.txt +block_dump enables block I/O debugging when set to a nonzero value. More +information on block I/O debugging is in Documentation/laptops/laptop-mode.txt. ============================================================== -overcommit_memory: +dirty_background_bytes -This value contains a flag that enables memory overcommitment. +Contains the amount of dirty memory at which the pdflush background writeback +daemon will start writeback. -When this flag is 0, the kernel attempts to estimate the amount -of free memory left when userspace requests more memory. +If dirty_background_bytes is written, dirty_background_ratio becomes a function +of its value (dirty_background_bytes / the amount of dirtyable system memory). -When this flag is 1, the kernel pretends there is always enough -memory until it actually runs out. +============================================================== -When this flag is 2, the kernel uses a "never overcommit" -policy that attempts to prevent any overcommit of memory. +dirty_background_ratio -This feature can be very useful because there are a lot of -programs that malloc() huge amounts of memory "just-in-case" -and don't use much of it. +Contains, as a percentage of total system memory, the number of pages at which +the pdflush background writeback daemon will start writing out dirty data. -The default value is 0. +============================================================== -See Documentation/vm/overcommit-accounting and -security/commoncap.c::cap_vm_enough_memory() for more information. +dirty_bytes + +Contains the amount of dirty memory at which a process generating disk writes +will itself start writeback. + +If dirty_bytes is written, dirty_ratio becomes a function of its value +(dirty_bytes / the amount of dirtyable system memory). ============================================================== -overcommit_ratio: +dirty_expire_centisecs -When overcommit_memory is set to 2, the committed address -space is not permitted to exceed swap plus this percentage -of physical RAM. See above. +This tunable is used to define when dirty data is old enough to be eligible +for writeout by the pdflush daemons. It is expressed in 100'ths of a second. +Data which has been dirty in-memory for longer than this interval will be +written out next time a pdflush daemon wakes up. + +============================================================== + +dirty_ratio + +Contains, as a percentage of total system memory, the number of pages at which +a process which is generating disk writes will itself start writing out dirty +data. ============================================================== -page-cluster: +dirty_writeback_centisecs -The Linux VM subsystem avoids excessive disk seeks by reading -multiple pages on a page fault. The number of pages it reads -is dependent on the amount of memory in your machine. +The pdflush writeback daemons will periodically wake up and write `old' data +out to disk. This tunable expresses the interval between those wakeups, in +100'ths of a second. -The number of pages the kernel reads in at once is equal to -2 ^ page-cluster. Values above 2 ^ 5 don't make much sense -for swap because we only cluster swap data in 32-page groups. +Setting this to zero disables periodic writeback altogether. ============================================================== -max_map_count: +drop_caches -This file contains the maximum number of memory map areas a process -may have. Memory map areas are used as a side-effect of calling -malloc, directly by mmap and mprotect, and also when loading shared -libraries. +Writing to this will cause the kernel to drop clean caches, dentries and +inodes from memory, causing that memory to become free. -While most applications need less than a thousand maps, certain -programs, particularly malloc debuggers, may consume lots of them, -e.g., up to one or two maps per allocation. +To free pagecache: + echo 1 > /proc/sys/vm/drop_caches +To free dentries and inodes: + echo 2 > /proc/sys/vm/drop_caches +To free pagecache, dentries and inodes: + echo 3 > /proc/sys/vm/drop_caches -The default value is 65536. +As this is a non-destructive operation and dirty objects are not freeable, the +user should run `sync' first. ============================================================== -min_free_kbytes: +hugepages_treat_as_movable -This is used to force the Linux VM to keep a minimum number -of kilobytes free. The VM uses this number to compute a pages_min -value for each lowmem zone in the system. Each lowmem zone gets -a number of reserved free pages based proportionally on its size. +This parameter is only useful when kernelcore= is specified at boot time to +create ZONE_MOVABLE for pages that may be reclaimed or migrated. Huge pages +are not movable so are not normally allocated from ZONE_MOVABLE. A non-zero +value written to hugepages_treat_as_movable allows huge pages to be allocated +from ZONE_MOVABLE. -Some minimal amount of memory is needed to satisfy PF_MEMALLOC -allocations; if you set this to lower than 1024KB, your system will -become subtly broken, and prone to deadlock under high loads. - -Setting this too high will OOM your machine instantly. +Once enabled, the ZONE_MOVABLE is treated as an area of memory the huge +pages pool can easily grow or shrink within. Assuming that applications are +not running that mlock() a lot of memory, it is likely the huge pages pool +can grow to the size of ZONE_MOVABLE by repeatedly entering the desired value +into nr_hugepages and triggering page reclaim. ============================================================== -percpu_pagelist_fraction +hugetlb_shm_group -This is the fraction of pages at most (high mark pcp->high) in each zone that -are allocated for each per cpu page list. The min value for this is 8. It -means that we don't allow more than 1/8th of pages in each zone to be -allocated in any single per_cpu_pagelist. This entry only changes the value -of hot per cpu pagelists. User can specify a number like 100 to allocate -1/100th of each zone to each per cpu page list. +hugetlb_shm_group contains group id that is allowed to create SysV +shared memory segment using hugetlb page. -The batch value of each per cpu pagelist is also updated as a result. It is -set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8) +============================================================== -The initial value is zero. Kernel does not use this value at boot time to set -the high water marks for each per cpu page list. +laptop_mode -=============================================================== +laptop_mode is a knob that controls "laptop mode". All the things that are +controlled by this knob are discussed in Documentation/laptops/laptop-mode.txt. -zone_reclaim_mode: +============================================================== -Zone_reclaim_mode allows someone to set more or less aggressive approaches to -reclaim memory when a zone runs out of memory. If it is set to zero then no -zone reclaim occurs. Allocations will be satisfied from other zones / nodes -in the system. +legacy_va_layout -This is value ORed together of +If non-zero, this sysctl disables the new 32-bit mmap mmap layout - the kernel +will use the legacy (2.4) layout for all processes. -1 = Zone reclaim on -2 = Zone reclaim writes dirty pages out -4 = Zone reclaim swaps pages +============================================================== -zone_reclaim_mode is set during bootup to 1 if it is determined that pages -from remote zones will cause a measurable performance reduction. The -page allocator will then reclaim easily reusable pages (those page -cache pages that are currently not used) before allocating off node pages. +lowmem_reserve_ratio + +For some specialised workloads on highmem machines it is dangerous for +the kernel to allow process memory to be allocated from the "lowmem" +zone. This is because that memory could then be pinned via the mlock() +system call, or by unavailability of swapspace. + +And on large highmem machines this lack of reclaimable lowmem memory +can be fatal. + +So the Linux page allocator has a mechanism which prevents allocations +which _could_ use highmem from using too much lowmem. This means that +a certain amount of lowmem is defended from the possibility of being +captured into pinned user memory. + +(The same argument applies to the old 16 megabyte ISA DMA region. This +mechanism will also defend that region from allocations which could use +highmem or lowmem). + +The `lowmem_reserve_ratio' tunable determines how aggressive the kernel is +in defending these lower zones. + +If you have a machine which uses highmem or ISA DMA and your +applications are using mlock(), or if you are running with no swap then +you probably should change the lowmem_reserve_ratio setting. + +The lowmem_reserve_ratio is an array. You can see them by reading this file. +- +% cat /proc/sys/vm/lowmem_reserve_ratio +256 256 32 +- +Note: # of this elements is one fewer than number of zones. Because the highest + zone's value is not necessary for following calculation. + +But, these values are not used directly. The kernel calculates # of protection +pages for each zones from them. These are shown as array of protection pages +in /proc/zoneinfo like followings. (This is an example of x86-64 box). +Each zone has an array of protection pages like this. + +- +Node 0, zone DMA + pages free 1355 + min 3 + low 3 + high 4 + : + : + numa_other 0 + protection: (0, 2004, 2004, 2004) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + pagesets + cpu: 0 pcp: 0 + : +- +These protections are added to score to judge whether this zone should be used +for page allocation or should be reclaimed. + +In this example, if normal pages (index=2) are required to this DMA zone and +pages_high is used for watermark, the kernel judges this zone should not be +used because pages_free(1355) is smaller than watermark + protection[2] +(4 + 2004 = 2008). If this protection value is 0, this zone would be used for +normal page requirement. If requirement is DMA zone(index=0), protection[0] +(=0) is used. + +zone[i]'s protection[j] is calculated by following expression. + +(i < j): + zone[i]->protection[j] + = (total sums of present_pages from zone[i+1] to zone[j] on the node) + / lowmem_reserve_ratio[i]; +(i = j): + (should not be protected. = 0; +(i > j): + (not necessary, but looks 0) + +The default values of lowmem_reserve_ratio[i] are + 256 (if zone[i] means DMA or DMA32 zone) + 32 (others). +As above expression, they are reciprocal number of ratio. +256 means 1/256. # of protection pages becomes about "0.39%" of total present +pages of higher zones on the node. + +If you would like to protect more pages, smaller values are effective. +The minimum value is 1 (1/1 -> 100%). -It may be beneficial to switch off zone reclaim if the system is -used for a file server and all of memory should be used for caching files -from disk. In that case the caching effect is more important than -data locality. +============================================================== -Allowing zone reclaim to write out pages stops processes that are -writing large amounts of data from dirtying pages on other nodes. Zone -reclaim will write out dirty pages if a zone fills up and so effectively -throttle the process. This may decrease the performance of a single process -since it cannot use all of system memory to buffer the outgoing writes -anymore but it preserve the memory on other nodes so that the performance -of other processes running on other nodes will not be affected. +max_map_count: -Allowing regular swap effectively restricts allocations to the local -node unless explicitly overridden by memory policies or cpuset -configurations. +This file contains the maximum number of memory map areas a process +may have. Memory map areas are used as a side-effect of calling +malloc, directly by mmap and mprotect, and also when loading shared +libraries. -============================================================= +While most applications need less than a thousand maps, certain +programs, particularly malloc debuggers, may consume lots of them, +e.g., up to one or two maps per allocation. -min_unmapped_ratio: +The default value is 65536. -This is available only on NUMA kernels. +============================================================== -A percentage of the total pages in each zone. Zone reclaim will only -occur if more than this percentage of pages are file backed and unmapped. -This is to insure that a minimal amount of local pages is still available for -file I/O even if the node is overallocated. +min_free_kbytes: -The default is 1 percent. +This is used to force the Linux VM to keep a minimum number +of kilobytes free. The VM uses this number to compute a pages_min +value for each lowmem zone in the system. Each lowmem zone gets +a number of reserved free pages based proportionally on its size. + +Some minimal amount of memory is needed to satisfy PF_MEMALLOC +allocations; if you set this to lower than 1024KB, your system will +become subtly broken, and prone to deadlock under high loads. + +Setting this too high will OOM your machine instantly. ============================================================= @@ -211,82 +306,73 @@ and may not be fast. ============================================================= -panic_on_oom +min_unmapped_ratio: -This enables or disables panic on out-of-memory feature. +This is available only on NUMA kernels. -If this is set to 0, the kernel will kill some rogue process, -called oom_killer. Usually, oom_killer can kill rogue processes and -system will survive. +A percentage of the total pages in each zone. Zone reclaim will only +occur if more than this percentage of pages are file backed and unmapped. +This is to insure that a minimal amount of local pages is still available for +file I/O even if the node is overallocated. -If this is set to 1, the kernel panics when out-of-memory happens. -However, if a process limits using nodes by mempolicy/cpusets, -and those nodes become memory exhaustion status, one process -may be killed by oom-killer. No panic occurs in this case. -Because other nodes' memory may be free. This means system total status -may be not fatal yet. +The default is 1 percent. -If this is set to 2, the kernel panics compulsorily even on the -above-mentioned. +============================================================== -The default value is 0. -1 and 2 are for failover of clustering. Please select either -according to your policy of failover. +mmap_min_addr -============================================================= +This file indicates the amount of address space which a user process will +be restricted from mmaping. Since kernel null dereference bugs could +accidentally operate based on the information in the first couple of pages +of memory userspace processes should not be allowed to write to them. By +default this value is set to 0 and no protections will be enforced by the +security module. Setting this value to something like 64k will allow the +vast majority of applications to work correctly and provide defense in depth +against future potential kernel bugs. -oom_dump_tasks +============================================================== -Enables a system-wide task dump (excluding kernel threads) to be -produced when the kernel performs an OOM-killing and includes such -information as pid, uid, tgid, vm size, rss, cpu, oom_adj score, and -name. This is helpful to determine why the OOM killer was invoked -and to identify the rogue task that caused it. +nr_hugepages -If this is set to zero, this information is suppressed. On very -large systems with thousands of tasks it may not be feasible to dump -the memory state information for each one. Such systems should not -be forced to incur a performance penalty in OOM conditions when the -information may not be desired. +Change the minimum size of the hugepage pool. -If this is set to non-zero, this information is shown whenever the -OOM killer actually kills a memory-hogging task. +See Documentation/vm/hugetlbpage.txt -The default value is 0. +============================================================== -============================================================= +nr_overcommit_hugepages -oom_kill_allocating_task +Change the maximum size of the hugepage pool. The maximum is +nr_hugepages + nr_overcommit_hugepages. -This enables or disables killing the OOM-triggering task in -out-of-memory situations. +See Documentation/vm/hugetlbpage.txt -If this is set to zero, the OOM killer will scan through the entire -tasklist and select a task based on heuristics to kill. This normally -selects a rogue memory-hogging task that frees up a large amount of -memory when killed. +============================================================== -If this is set to non-zero, the OOM killer simply kills the task that -triggered the out-of-memory condition. This avoids the expensive -tasklist scan. +nr_pdflush_threads -If panic_on_oom is selected, it takes precedence over whatever value -is used in oom_kill_allocating_task. +The current number of pdflush threads. This value is read-only. +The value changes according to the number of dirty pages in the system. -The default value is 0. +When neccessary, additional pdflush threads are created, one per second, up to +nr_pdflush_threads_max. ============================================================== -mmap_min_addr +nr_trim_pages -This file indicates the amount of address space which a user process will -be restricted from mmaping. Since kernel null dereference bugs could -accidentally operate based on the information in the first couple of pages -of memory userspace processes should not be allowed to write to them. By -default this value is set to 0 and no protections will be enforced by the -security module. Setting this value to something like 64k will allow the -vast majority of applications to work correctly and provide defense in depth -against future potential kernel bugs. +This is available only on NOMMU kernels. + +This value adjusts the excess page trimming behaviour of power-of-2 aligned +NOMMU mmap allocations. + +A value of 0 disables trimming of allocations entirely, while a value of 1 +trims excess pages aggressively. Any value >= 1 acts as the watermark where +trimming of allocations is initiated. + +The default value is 1. + +See Documentation/nommu-mmap.txt for more information. ============================================================== @@ -335,34 +421,199 @@ this is causing problems for your system/application. ============================================================== -nr_hugepages +oom_dump_tasks -Change the minimum size of the hugepage pool. +Enables a system-wide task dump (excluding kernel threads) to be +produced when the kernel performs an OOM-killing and includes such +information as pid, uid, tgid, vm size, rss, cpu, oom_adj score, and +name. This is helpful to determine why the OOM killer was invoked +and to identify the rogue task that caused it. -See Documentation/vm/hugetlbpage.txt +If this is set to zero, this information is suppressed. On very +large systems with thousands of tasks it may not be feasible to dump +the memory state information for each one. Such systems should not +be forced to incur a performance penalty in OOM conditions when the +information may not be desired. + +If this is set to non-zero, this information is shown whenever the +OOM killer actually kills a memory-hogging task. + +The default value is 0. ============================================================== -nr_overcommit_hugepages +oom_kill_allocating_task -Change the maximum size of the hugepage pool. The maximum is -nr_hugepages + nr_overcommit_hugepages. +This enables or disables killing the OOM-triggering task in +out-of-memory situations. -See Documentation/vm/hugetlbpage.txt +If this is set to zero, the OOM killer will scan through the entire +tasklist and select a task based on heuristics to kill. This normally +selects a rogue memory-hogging task that frees up a large amount of +memory when killed. + +If this is set to non-zero, the OOM killer simply kills the task that +triggered the out-of-memory condition. This avoids the expensive +tasklist scan. + +If panic_on_oom is selected, it takes precedence over whatever value +is used in oom_kill_allocating_task. + +The default value is 0. ============================================================== -nr_trim_pages +overcommit_memory: -This is available only on NOMMU kernels. +This value contains a flag that enables memory overcommitment. -This value adjusts the excess page trimming behaviour of power-of-2 aligned -NOMMU mmap allocations. +When this flag is 0, the kernel attempts to estimate the amount +of free memory left when userspace requests more memory. -A value of 0 disables trimming of allocations entirely, while a value of 1 -trims excess pages aggressively. Any value >= 1 acts as the watermark where -trimming of allocations is initiated. +When this flag is 1, the kernel pretends there is always enough +memory until it actually runs out. -The default value is 1. +When this flag is 2, the kernel uses a "never overcommit" +policy that attempts to prevent any overcommit of memory. -See Documentation/nommu-mmap.txt for more information. +This feature can be very useful because there are a lot of +programs that malloc() huge amounts of memory "just-in-case" +and don't use much of it. + +The default value is 0. + +See Documentation/vm/overcommit-accounting and +security/commoncap.c::cap_vm_enough_memory() for more information. + +============================================================== + +overcommit_ratio: + +When overcommit_memory is set to 2, the committed address +space is not permitted to exceed swap plus this percentage +of physical RAM. See above. + +============================================================== + +page-cluster + +page-cluster controls the number of pages which are written to swap in +a single attempt. The swap I/O size. + +It is a logarithmic value - setting it to zero means "1 page", setting +it to 1 means "2 pages", setting it to 2 means "4 pages", etc. + +The default value is three (eight pages at a time). There may be some +small benefits in tuning this to a different value if your workload is +swap-intensive. + +============================================================= + +panic_on_oom + +This enables or disables panic on out-of-memory feature. + +If this is set to 0, the kernel will kill some rogue process, +called oom_killer. Usually, oom_killer can kill rogue processes and +system will survive. + +If this is set to 1, the kernel panics when out-of-memory happens. +However, if a process limits using nodes by mempolicy/cpusets, +and those nodes become memory exhaustion status, one process +may be killed by oom-killer. No panic occurs in this case. +Because other nodes' memory may be free. This means system total status +may be not fatal yet. + +If this is set to 2, the kernel panics compulsorily even on the +above-mentioned. + +The default value is 0. +1 and 2 are for failover of clustering. Please select either +according to your policy of failover. + +============================================================= + +percpu_pagelist_fraction + +This is the fraction of pages at most (high mark pcp->high) in each zone that +are allocated for each per cpu page list. The min value for this is 8. It +means that we don't allow more than 1/8th of pages in each zone to be +allocated in any single per_cpu_pagelist. This entry only changes the value +of hot per cpu pagelists. User can specify a number like 100 to allocate +1/100th of each zone to each per cpu page list. + +The batch value of each per cpu pagelist is also updated as a result. It is +set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8) + +The initial value is zero. Kernel does not use this value at boot time to set +the high water marks for each per cpu page list. + +============================================================== + +stat_interval + +The time interval between which vm statistics are updated. The default +is 1 second. + +============================================================== + +swappiness + +This control is used to define how aggressive the kernel will swap +memory pages. Higher values will increase agressiveness, lower values +descrease the amount of swap. + +The default value is 60. + +============================================================== + +vfs_cache_pressure +------------------ + +Controls the tendency of the kernel to reclaim the memory which is used for +caching of directory and inode objects. + +At the default value of vfs_cache_pressure=100 the kernel will attempt to +reclaim dentries and inodes at a "fair" rate with respect to pagecache and +swapcache reclaim. Decreasing vfs_cache_pressure causes the kernel to prefer +to retain dentry and inode caches. Increasing vfs_cache_pressure beyond 100 +causes the kernel to prefer to reclaim dentries and inodes. + +============================================================== + +zone_reclaim_mode: + +Zone_reclaim_mode allows someone to set more or less aggressive approaches to +reclaim memory when a zone runs out of memory. If it is set to zero then no +zone reclaim occurs. Allocations will be satisfied from other zones / nodes +in the system. + +This is value ORed together of + +1 = Zone reclaim on +2 = Zone reclaim writes dirty pages out +4 = Zone reclaim swaps pages + +zone_reclaim_mode is set during bootup to 1 if it is determined that pages +from remote zones will cause a measurable performance reduction. The +page allocator will then reclaim easily reusable pages (those page +cache pages that are currently not used) before allocating off node pages. + +It may be beneficial to switch off zone reclaim if the system is +used for a file server and all of memory should be used for caching files +from disk. In that case the caching effect is more important than +data locality. + +Allowing zone reclaim to write out pages stops processes that are +writing large amounts of data from dirtying pages on other nodes. Zone +reclaim will write out dirty pages if a zone fills up and so effectively +throttle the process. This may decrease the performance of a single process +since it cannot use all of system memory to buffer the outgoing writes +anymore but it preserve the memory on other nodes so that the performance +of other processes running on other nodes will not be affected. + +Allowing regular swap effectively restricts allocations to the local +node unless explicitly overridden by memory policies or cpuset +configurations. + +============ End of Document ================================= -- cgit From 1f14081d8b31b2780e2e6e2ae2766264e1f7d38b Mon Sep 17 00:00:00 2001 From: Itai Levi Date: Thu, 15 Jan 2009 13:50:43 -0800 Subject: atmel_serial: fix flow control bug Fix the following problem, related to hardware flow control (CTS/RTS): Transmitting while CTS line is asserted in DMA mode, due to not checking for tx-stopped condition. We found these problems while testing the UARTs with hardware flow-control. Signed-off-by: Haavard Skinnemoen Cc: "Andrew Victor" Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/serial/atmel_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/serial/atmel_serial.c b/drivers/serial/atmel_serial.c index d5efd6c7790..89362d733d6 100644 --- a/drivers/serial/atmel_serial.c +++ b/drivers/serial/atmel_serial.c @@ -579,7 +579,7 @@ static void atmel_tx_dma(struct uart_port *port) /* disable PDC transmit */ UART_PUT_PTCR(port, ATMEL_PDC_TXTDIS); - if (!uart_circ_empty(xmit)) { + if (!uart_circ_empty(xmit) && !uart_tx_stopped(port)) { dma_sync_single_for_device(port->dev, pdc->dma_addr, pdc->dma_size, -- cgit From 5b96f1729064453e09805a387378e0644da1c937 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 15 Jan 2009 13:50:44 -0800 Subject: atmel_spi: allow transfer when max_speed_hz = 0 For some reason I have to slowdown clock to touchscreen device. In atmel_spi_setup() there is comment that max_speed_hz == 0 means as slow as possible and divider is set to maximum value. But in atmel_spi_transfer() function is check against not zero max_speed_hz with EINVAL returned. Probably driver should setup divider for each transfer based on transfer->speed_hz value, but I think that would be not necessary overhead as all used devices have constant clock. Below patch works fine for me. Signed-off-by: Stanislaw Gruszka Signed-off-by: Haavard Skinnemoen Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/spi/atmel_spi.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/spi/atmel_spi.c b/drivers/spi/atmel_spi.c index 5e39bac9c51..56ff3e6864e 100644 --- a/drivers/spi/atmel_spi.c +++ b/drivers/spi/atmel_spi.c @@ -670,8 +670,7 @@ static int atmel_spi_transfer(struct spi_device *spi, struct spi_message *msg) dev_dbg(controller, "new message %p submitted for %s\n", msg, spi->dev.bus_id); - if (unlikely(list_empty(&msg->transfers) - || !spi->max_speed_hz)) + if (unlikely(list_empty(&msg->transfers))) return -EINVAL; if (as->stopping) -- cgit From a342d215c206d955fea55d778e3803b29ee41b60 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 15 Jan 2009 13:50:45 -0800 Subject: gpio: fix probe() error return in gpio driver probes A number of drivers in drivers/gpio return -ENODEV when confronted with missing setup parameters such as the platform data. However, returning -ENODEV causes the driver layer to silently ignore the driver as it assumes the probe did not find anything and was only speculative. To make life easier to discern why a driver is not being attached, change to returning -EINVAL, which is a better description of the fact that the driver data was not valid. Also add a set of dev_dbg() statements to the error paths to provide an better explanation of the error as there may be more that one point in the driver. Signed-off-by: Ben Dooks Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/max7301.c | 6 ++++-- drivers/gpio/max732x.c | 6 ++++-- drivers/gpio/mcp23s08.c | 6 ++++-- drivers/gpio/pca953x.c | 6 ++++-- drivers/gpio/pcf857x.c | 12 ++++++++---- 5 files changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/gpio/max7301.c b/drivers/gpio/max7301.c index 8b24d784db9..3e7f4e06386 100644 --- a/drivers/gpio/max7301.c +++ b/drivers/gpio/max7301.c @@ -217,8 +217,10 @@ static int __devinit max7301_probe(struct spi_device *spi) int i, ret; pdata = spi->dev.platform_data; - if (!pdata || !pdata->base) - return -ENODEV; + if (!pdata || !pdata->base) { + dev_dbg(&spi->dev, "incorrect or missing platform data\n"); + return -EINVAL; + } /* * bits_per_word cannot be configured in platform data diff --git a/drivers/gpio/max732x.c b/drivers/gpio/max732x.c index 55ae9a41897..f7868243af8 100644 --- a/drivers/gpio/max732x.c +++ b/drivers/gpio/max732x.c @@ -267,8 +267,10 @@ static int __devinit max732x_probe(struct i2c_client *client, int ret, nr_port; pdata = client->dev.platform_data; - if (pdata == NULL) - return -ENODEV; + if (pdata == NULL) { + dev_dbg(&client->dev, "no platform data\n"); + return -EINVAL; + } chip = kzalloc(sizeof(struct max732x_chip), GFP_KERNEL); if (chip == NULL) diff --git a/drivers/gpio/mcp23s08.c b/drivers/gpio/mcp23s08.c index 89c1d222e9d..f6fae0e50e6 100644 --- a/drivers/gpio/mcp23s08.c +++ b/drivers/gpio/mcp23s08.c @@ -310,8 +310,10 @@ static int mcp23s08_probe(struct spi_device *spi) unsigned base; pdata = spi->dev.platform_data; - if (!pdata || !gpio_is_valid(pdata->base)) - return -ENODEV; + if (!pdata || !gpio_is_valid(pdata->base)) { + dev_dbg(&spi->dev, "invalid or missing platform data\n"); + return -EINVAL; + } for (addr = 0; addr < 4; addr++) { if (!pdata->chip[addr].is_present) diff --git a/drivers/gpio/pca953x.c b/drivers/gpio/pca953x.c index 37f35388a2a..8dc0164bd51 100644 --- a/drivers/gpio/pca953x.c +++ b/drivers/gpio/pca953x.c @@ -202,8 +202,10 @@ static int __devinit pca953x_probe(struct i2c_client *client, int ret; pdata = client->dev.platform_data; - if (pdata == NULL) - return -ENODEV; + if (pdata == NULL) { + dev_dbg(&client->dev, "no platform data\n"); + return -EINVAL; + } chip = kzalloc(sizeof(struct pca953x_chip), GFP_KERNEL); if (chip == NULL) diff --git a/drivers/gpio/pcf857x.c b/drivers/gpio/pcf857x.c index 4bc2070dd4a..9525724be73 100644 --- a/drivers/gpio/pcf857x.c +++ b/drivers/gpio/pcf857x.c @@ -188,8 +188,10 @@ static int pcf857x_probe(struct i2c_client *client, int status; pdata = client->dev.platform_data; - if (!pdata) - return -ENODEV; + if (!pdata) { + dev_dbg(&client->dev, "no platform data\n"); + return -EINVAL; + } /* Allocate, initialize, and register this gpio_chip. */ gpio = kzalloc(sizeof *gpio, GFP_KERNEL); @@ -248,8 +250,10 @@ static int pcf857x_probe(struct i2c_client *client, else status = i2c_read_le16(client); - } else - status = -ENODEV; + } else { + dev_dbg(&client->dev, "unsupported number of gpios\n"); + status = -EINVAL; + } if (status < 0) goto fail; -- cgit From b46578ed094122a6b36002c644711cef68aa9c9e Mon Sep 17 00:00:00 2001 From: Michael Hennerich Date: Thu, 15 Jan 2009 13:50:46 -0800 Subject: video/framebuffer: fix bug: jpegview cannot work on framebuffer device other than 16BPP Force fb_var_screeninfo color format on all Blackfin Framebuffer Drivers. Signed-off-by: Michael Hennerich Signed-off-by: Bryan Wu Cc: Mike Frysinger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/bf54x-lq043fb.c | 15 ++++++++++++++- drivers/video/bfin-t350mcqb-fb.c | 15 ++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/video/bf54x-lq043fb.c b/drivers/video/bf54x-lq043fb.c index 7644ed24956..37e60b1d2ed 100644 --- a/drivers/video/bf54x-lq043fb.c +++ b/drivers/video/bf54x-lq043fb.c @@ -335,7 +335,20 @@ static int bfin_bf54x_fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { - if (var->bits_per_pixel != LCD_BPP) { + switch (var->bits_per_pixel) { + case 24:/* TRUECOLOUR, 16m */ + var->red.offset = 16; + var->green.offset = 8; + var->blue.offset = 0; + var->red.length = var->green.length = var->blue.length = 8; + var->transp.offset = 0; + var->transp.length = 0; + var->transp.msb_right = 0; + var->red.msb_right = 0; + var->green.msb_right = 0; + var->blue.msb_right = 0; + break; + default: pr_debug("%s: depth not supported: %u BPP\n", __func__, var->bits_per_pixel); return -EINVAL; diff --git a/drivers/video/bfin-t350mcqb-fb.c b/drivers/video/bfin-t350mcqb-fb.c index a9b3ada05d9..2a423d3a2a8 100644 --- a/drivers/video/bfin-t350mcqb-fb.c +++ b/drivers/video/bfin-t350mcqb-fb.c @@ -254,7 +254,20 @@ static int bfin_t350mcqb_fb_check_var(struct fb_var_screeninfo *var, struct fb_info *info) { - if (var->bits_per_pixel != LCD_BPP) { + switch (var->bits_per_pixel) { + case 24:/* TRUECOLOUR, 16m */ + var->red.offset = 0; + var->green.offset = 8; + var->blue.offset = 16; + var->red.length = var->green.length = var->blue.length = 8; + var->transp.offset = 0; + var->transp.length = 0; + var->transp.msb_right = 0; + var->red.msb_right = 0; + var->green.msb_right = 0; + var->blue.msb_right = 0; + break; + default: pr_debug("%s: depth not supported: %u BPP\n", __func__, var->bits_per_pixel); return -EINVAL; -- cgit From 822c18f2e38cbc775792ab65ace4f9198678dec9 Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Thu, 15 Jan 2009 13:50:48 -0800 Subject: alpha: fix vmalloc breakage On alpha, we have to map some stuff in the VMALLOC space very early in the boot process (to make SRM console callbacks work and so on, see arch/alpha/mm/init.c). For old VM allocator, we just manually placed a vm_struct onto the global vmlist and this worked for ages. Unfortunately, the new allocator isn't aware of this, so it constantly tries to allocate the VM space which is already in use, making vmalloc on alpha defunct. This patch forces KVA to import vmlist entries on init. [akpm@linux-foundation.org: remove unneeded check (per Johannes)] Signed-off-by: Ivan Kokshaysky Cc: Nick Piggin Cc: Johannes Weiner Cc: Richard Henderson Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index c5db9a7264d..7e00b280648 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -984,6 +985,8 @@ EXPORT_SYMBOL(vm_map_ram); void __init vmalloc_init(void) { + struct vmap_area *va; + struct vm_struct *tmp; int i; for_each_possible_cpu(i) { @@ -996,6 +999,14 @@ void __init vmalloc_init(void) vbq->nr_dirty = 0; } + /* Import existing vmlist entries. */ + for (tmp = vmlist; tmp; tmp = tmp->next) { + va = alloc_bootmem(sizeof(struct vmap_area)); + va->flags = tmp->flags | VM_VM_AREA; + va->va_start = (unsigned long)tmp->addr; + va->va_end = va->va_start + tmp->size; + __insert_vmap_area(va); + } vmap_initialized = true; } -- cgit From 44c12cb2f564b532971c1474073fcbbb8336242e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Thu, 15 Jan 2009 13:50:49 -0800 Subject: edac: add MAINTAINERS entry for i5400 EDAC driver i5400 EDAC driver were added upstream by those changesets: - 920c8df6ac678fdb8c49a6ce2e47a98e62757d77 "edac: driver for i5400 MCH (Seaburg)" - 8375d4909aee4c18798f373ecf24a79f040f75fc "edac: driver for i5400 MCH (update)" Update MAINTAINERS entry for this file to correspond to the driver maintainer. Signed-off-by: Mauro Carvalho Chehab Cc: Ben Woodard Cc: Doug Thompson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 3f6ef02ad68..73548f8ad0b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1581,6 +1581,13 @@ L: bluesmoke-devel@lists.sourceforge.net W: bluesmoke.sourceforge.net S: Maintained +EDAC-I5400 +P: Mauro Carvalho Chehab +M: mchehab@redhat.com +L: bluesmoke-devel@lists.sourceforge.net +W: bluesmoke.sourceforge.net +S: Maintained + EDAC-I82975X P: Ranganathan Desikan P: Arvind R. -- cgit From 89365e264104b52da6a61c4e227bb5a934764fa7 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 15 Jan 2009 13:50:50 -0800 Subject: sysrq documentation: remove the redundant updated date git is maintaining the last update time much more accuratly than the internal update time. Remove it. Signed-off-by: Andy Whitcroft Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysrq.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt index 10a0263ebb3..265f637b97d 100644 --- a/Documentation/sysrq.txt +++ b/Documentation/sysrq.txt @@ -1,6 +1,5 @@ Linux Magic System Request Key Hacks Documentation for sysrq.c -Last update: 2007-AUG-04 * What is the magic SysRq key? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- cgit From 47c33d9c1984ae4c5bd1f144024eacc14c5bc0c0 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 15 Jan 2009 13:50:51 -0800 Subject: sysrq documentation: document why the command header only is shown Document the interactions between loglevel and the sysrq output. Also document how to work round it should output be required on the console. Signed-off-by: Andy Whitcroft Cc: Martin Mares Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sysrq.txt | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt index 265f637b97d..9e592c718af 100644 --- a/Documentation/sysrq.txt +++ b/Documentation/sysrq.txt @@ -210,6 +210,24 @@ within a function called by handle_sysrq, you must be aware that you are in a lock (you are also in an interrupt handler, which means don't sleep!), so you must call __handle_sysrq_nolock instead. +* When I hit a SysRq key combination only the header appears on the console? +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Sysrq output is subject to the same console loglevel control as all +other console output. This means that if the kernel was booted 'quiet' +as is common on distro kernels the output may not appear on the actual +console, even though it will appear in the dmesg buffer, and be accessible +via the dmesg command and to the consumers of /proc/kmsg. As a specific +exception the header line from the sysrq command is passed to all console +consumers as if the current loglevel was maximum. If only the header +is emitted it is almost certain that the kernel loglevel is too low. +Should you require the output on the console channel then you will need +to temporarily up the console loglevel using alt-sysrq-8 or: + + echo 8 > /proc/sysrq-trigger + +Remember to return the loglevel to normal after triggering the sysrq +command you are interested in. + * I have more questions, who can I ask? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ And I'll answer any questions about the registration system you got, also -- cgit From fb144adc517d9ebe8fd8d98a5696fb68ec91e1f5 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 15 Jan 2009 13:50:52 -0800 Subject: sysrq: add commentary on why we use the console loglevel over using KERN_EMERG Add an explanitory comment as to why we modify the kernel console loglevel rather than simply moving sysrq messages to KERN_EMERG level. Signed-off-by: Andy Whitcroft Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/sysrq.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index d41b9f6f790..33a9351c896 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -473,6 +473,12 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask) unsigned long flags; spin_lock_irqsave(&sysrq_key_table_lock, flags); + /* + * Raise the apparent loglevel to maximum so that the sysrq header + * is shown to provide the user with positive feedback. We do not + * simply emit this at KERN_EMERG as that would change message + * routing in the consumers of /proc/kmsg. + */ orig_log_level = console_loglevel; console_loglevel = 7; printk(KERN_INFO "SysRq : "); -- cgit From a748384bba1754409383ba9f0738bffdfa3fd431 Mon Sep 17 00:00:00 2001 From: Alessandro Zummo Date: Thu, 15 Jan 2009 13:50:52 -0800 Subject: rtc: tw4030 add alarm/update interfaces - implement alarm_irq_enable - return correct error code when registering fails [dbrownell@users.sourceforge.net: build fixes, force 1/sec irqs] Signed-off-by: Alessandro Zummo Signed-off-by: David Brownell Cc: Tony Lindgren Cc: Samuel Ortiz Cc: rtc-linux@googlegroups.com Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-twl4030.c | 44 ++++++++++---------------------------------- 1 file changed, 10 insertions(+), 34 deletions(-) diff --git a/drivers/rtc/rtc-twl4030.c b/drivers/rtc/rtc-twl4030.c index 8ce5f74ee45..25dc3e1cedb 100644 --- a/drivers/rtc/rtc-twl4030.c +++ b/drivers/rtc/rtc-twl4030.c @@ -120,7 +120,7 @@ static int twl4030_rtc_write_u8(u8 data, u8 reg) static unsigned char rtc_irq_bits; /* - * Enable timer and/or alarm interrupts. + * Enable 1/second update and/or alarm interrupts. */ static int set_rtc_irq_bit(unsigned char bit) { @@ -128,6 +128,7 @@ static int set_rtc_irq_bit(unsigned char bit) int ret; val = rtc_irq_bits | bit; + val &= ~BIT_RTC_INTERRUPTS_REG_EVERY_M; ret = twl4030_rtc_write_u8(val, REG_RTC_INTERRUPTS_REG); if (ret == 0) rtc_irq_bits = val; @@ -136,7 +137,7 @@ static int set_rtc_irq_bit(unsigned char bit) } /* - * Disable timer and/or alarm interrupts. + * Disable update and/or alarm interrupts. */ static int mask_rtc_irq_bit(unsigned char bit) { @@ -151,7 +152,7 @@ static int mask_rtc_irq_bit(unsigned char bit) return ret; } -static inline int twl4030_rtc_alarm_irq_set_state(int enabled) +static int twl4030_rtc_alarm_irq_enable(struct device *dev, unsigned enabled) { int ret; @@ -163,7 +164,7 @@ static inline int twl4030_rtc_alarm_irq_set_state(int enabled) return ret; } -static inline int twl4030_rtc_irq_set_state(int enabled) +static int twl4030_rtc_update_irq_enable(struct device *dev, unsigned enabled) { int ret; @@ -292,7 +293,7 @@ static int twl4030_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm) unsigned char alarm_data[ALL_TIME_REGS + 1]; int ret; - ret = twl4030_rtc_alarm_irq_set_state(0); + ret = twl4030_rtc_alarm_irq_enable(dev, 0); if (ret) goto out; @@ -312,35 +313,11 @@ static int twl4030_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm) } if (alm->enabled) - ret = twl4030_rtc_alarm_irq_set_state(1); + ret = twl4030_rtc_alarm_irq_enable(dev, 1); out: return ret; } -#ifdef CONFIG_RTC_INTF_DEV - -static int twl4030_rtc_ioctl(struct device *dev, unsigned int cmd, - unsigned long arg) -{ - switch (cmd) { - case RTC_AIE_OFF: - return twl4030_rtc_alarm_irq_set_state(0); - case RTC_AIE_ON: - return twl4030_rtc_alarm_irq_set_state(1); - case RTC_UIE_OFF: - return twl4030_rtc_irq_set_state(0); - case RTC_UIE_ON: - return twl4030_rtc_irq_set_state(1); - - default: - return -ENOIOCTLCMD; - } -} - -#else -#define twl4030_rtc_ioctl NULL -#endif - static irqreturn_t twl4030_rtc_interrupt(int irq, void *rtc) { unsigned long events = 0; @@ -400,11 +377,12 @@ out: } static struct rtc_class_ops twl4030_rtc_ops = { - .ioctl = twl4030_rtc_ioctl, .read_time = twl4030_rtc_read_time, .set_time = twl4030_rtc_set_time, .read_alarm = twl4030_rtc_read_alarm, .set_alarm = twl4030_rtc_set_alarm, + .alarm_irq_enable = twl4030_rtc_alarm_irq_enable, + .update_irq_enable = twl4030_rtc_update_irq_enable, }; /*----------------------------------------------------------------------*/ @@ -422,7 +400,7 @@ static int __devinit twl4030_rtc_probe(struct platform_device *pdev) rtc = rtc_device_register(pdev->name, &pdev->dev, &twl4030_rtc_ops, THIS_MODULE); if (IS_ERR(rtc)) { - ret = -EINVAL; + ret = PTR_ERR(rtc); dev_err(&pdev->dev, "can't register RTC device, err %ld\n", PTR_ERR(rtc)); goto out0; @@ -432,7 +410,6 @@ static int __devinit twl4030_rtc_probe(struct platform_device *pdev) platform_set_drvdata(pdev, rtc); ret = twl4030_rtc_read_u8(&rd_reg, REG_RTC_STATUS_REG); - if (ret < 0) goto out1; @@ -475,7 +452,6 @@ static int __devinit twl4030_rtc_probe(struct platform_device *pdev) return ret; - out2: free_irq(irq, rtc); out1: -- cgit From 4216d0bd8fef77b7926eb065a766e2ee003acf6a Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Thu, 15 Jan 2009 13:50:54 -0800 Subject: rtc-pxa: fix build failure Fix these build errors: CC drivers/rtc/rtc-pxa.o drivers/rtc/rtc-pxa.c: In function `pxa_rtc_init': drivers/rtc/rtc-pxa.c:472: error: implicit declaration of function `cpu_is_pxa27x' drivers/rtc/rtc-pxa.c:472: error: implicit declaration of function `cpu_is_pxa3xx' Signed-off-by: Antonio Ospite Signed-off-by: Alessandro Zummo Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-pxa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c index cc7eb8767b8..bd56a033bfd 100644 --- a/drivers/rtc/rtc-pxa.c +++ b/drivers/rtc/rtc-pxa.c @@ -27,6 +27,8 @@ #include #include +#include + #define TIMER_FREQ CLOCK_TICK_RATE #define RTC_DEF_DIVIDER (32768 - 1) #define RTC_DEF_TRIM 0 -- cgit From cafa1d8b0c90252e73f20db1b6f32cf88e73ff17 Mon Sep 17 00:00:00 2001 From: Matti Halme Date: Thu, 15 Jan 2009 13:50:56 -0800 Subject: rtc: rtc-twl4030 don't mask alarm interrupts on shutdown A triggering RTC alarm should be able to power on a device that has been powered off. This patch enables that on twl4030 by not masking the alarm interrupt at shutdown. Signed-off-by: Matti Halme Signed-off-by: Alessandro Zummo Acked-by: David Brownell Cc: Tony Lindgren Cc: Samuel Ortiz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/rtc/rtc-twl4030.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/rtc/rtc-twl4030.c b/drivers/rtc/rtc-twl4030.c index 25dc3e1cedb..ad35f76c46b 100644 --- a/drivers/rtc/rtc-twl4030.c +++ b/drivers/rtc/rtc-twl4030.c @@ -482,8 +482,9 @@ static int __devexit twl4030_rtc_remove(struct platform_device *pdev) static void twl4030_rtc_shutdown(struct platform_device *pdev) { - mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M | - BIT_RTC_INTERRUPTS_REG_IT_ALARM_M); + /* mask timer interrupts, but leave alarm interrupts on to enable + power-on when alarm is triggered */ + mask_rtc_irq_bit(BIT_RTC_INTERRUPTS_REG_IT_TIMER_M); } #ifdef CONFIG_PM -- cgit From 158bc69effbf96f59c01cdeb20f8d4c184e59f8e Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Thu, 15 Jan 2009 13:50:57 -0800 Subject: sgi-xp: eliminate false detection of no heartbeat After XPC has been up and running on multiple partitions for any length of time, if XPC on one of the partitions is stopped and restarted (either by a rmmod/insmod or a system restart), it is possible for the XPCs running on the other partitions to falsely detect a lack of heartbeat from the XPC that was just restarted. This false detection will occur if the restarted XPC comes up within the five-seconds preceding one of the other XPC's heartbeat check (which occurs once every twenty seconds). The detection of no heartbeat results in the detecting XPC deactivating from the just restarted XPC. The only remedy is to restart one of the XPCs and hope that one doesn't hit this five-second window on any of the other partitions. Signed-off-by: Dean Nelson Signed-off-by: Robin Holt Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/misc/sgi-xp/xpc_sn2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/sgi-xp/xpc_sn2.c b/drivers/misc/sgi-xp/xpc_sn2.c index 73b7fb8de47..82fb9958f22 100644 --- a/drivers/misc/sgi-xp/xpc_sn2.c +++ b/drivers/misc/sgi-xp/xpc_sn2.c @@ -899,7 +899,7 @@ xpc_update_partition_info_sn2(struct xpc_partition *part, u8 remote_rp_version, dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", part_sn2->remote_vars_pa); - part->last_heartbeat = remote_vars->heartbeat; + part->last_heartbeat = remote_vars->heartbeat - 1; dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n", part->last_heartbeat); -- cgit From a6b2f87be1aafedc585bad86b77c2e2bbd49b308 Mon Sep 17 00:00:00 2001 From: Paul Fulghum Date: Thu, 15 Jan 2009 13:50:57 -0800 Subject: synclink_gt: enable RI interrupt - Enable ring indicator interrupt. - Remove vendor specific CVS version tags. Signed-off-by: Paul Fulghum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/synclink_gt.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/char/synclink_gt.c b/drivers/char/synclink_gt.c index 53544e21f19..f329f459817 100644 --- a/drivers/char/synclink_gt.c +++ b/drivers/char/synclink_gt.c @@ -1,6 +1,4 @@ /* - * $Id: synclink_gt.c,v 4.50 2007/07/25 19:29:25 paulkf Exp $ - * * Device driver for Microgate SyncLink GT serial adapters. * * written by Paul Fulghum for Microgate Corporation @@ -91,7 +89,6 @@ * module identification */ static char *driver_name = "SyncLink GT"; -static char *driver_version = "$Revision: 4.50 $"; static char *tty_driver_name = "synclink_gt"; static char *tty_dev_prefix = "ttySLG"; MODULE_LICENSE("GPL"); @@ -1309,7 +1306,7 @@ static int read_proc(char *page, char **start, off_t off, int count, off_t begin = 0; struct slgt_info *info; - len += sprintf(page, "synclink_gt driver:%s\n", driver_version); + len += sprintf(page, "synclink_gt driver\n"); info = slgt_device_list; while( info ) { @@ -2441,7 +2438,7 @@ static void program_hw(struct slgt_info *info) info->ri_chkcount = 0; info->dsr_chkcount = 0; - slgt_irq_on(info, IRQ_DCD | IRQ_CTS | IRQ_DSR); + slgt_irq_on(info, IRQ_DCD | IRQ_CTS | IRQ_DSR | IRQ_RI); get_signals(info); if (info->netcount || @@ -3576,7 +3573,7 @@ static void slgt_cleanup(void) struct slgt_info *info; struct slgt_info *tmp; - printk("unload %s %s\n", driver_name, driver_version); + printk(KERN_INFO "unload %s\n", driver_name); if (serial_driver) { for (info=slgt_device_list ; info != NULL ; info=info->next_device) @@ -3619,7 +3616,7 @@ static int __init slgt_init(void) { int rc; - printk("%s %s\n", driver_name, driver_version); + printk(KERN_INFO "%s\n", driver_name); serial_driver = alloc_tty_driver(MAX_DEVICES); if (!serial_driver) { @@ -3650,9 +3647,8 @@ static int __init slgt_init(void) goto error; } - printk("%s %s, tty major#%d\n", - driver_name, driver_version, - serial_driver->major); + printk(KERN_INFO "%s, tty major#%d\n", + driver_name, serial_driver->major); slgt_device_count = 0; if ((rc = pci_register_driver(&pci_driver)) < 0) { -- cgit From 23964d2d02984d44aeb2d84d7ffb3359e728df43 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Jan 2009 13:50:58 -0800 Subject: cgroups: clean up Kconfig - move CONFIG_PROC_PID_CPUSET into cgroup menu - move MM_OWNER to the bottom for better menu indent - fix typos - use tabs not spaces Signed-off-by: Li Zefan Acked-by: Paul Menage Acked-by: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- init/Kconfig | 56 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/init/Kconfig b/init/Kconfig index a724a149bf3..56fd93c63c7 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -328,11 +328,10 @@ config CGROUP_SCHED endchoice -menu "Control Group support" -config CGROUPS - bool "Control Group support" +menuconfig CGROUPS + boolean "Control Group support" help - This option add support for grouping sets of processes together, for + This option adds support for grouping sets of processes together, for use with process control subsystems such as Cpusets, CFS, memory controls or device isolation. See @@ -343,6 +342,8 @@ config CGROUPS Say N if unsure. +if CGROUPS + config CGROUP_DEBUG bool "Example debug cgroup subsystem" depends on CGROUPS @@ -350,24 +351,24 @@ config CGROUP_DEBUG help This option enables a simple cgroup subsystem that exports useful debugging information about the cgroups - framework + framework. - Say N if unsure + Say N if unsure. config CGROUP_NS - bool "Namespace cgroup subsystem" - depends on CGROUPS - help - Provides a simple namespace cgroup subsystem to - provide hierarchical naming of sets of namespaces, - for instance virtual servers and checkpoint/restart - jobs. + bool "Namespace cgroup subsystem" + depends on CGROUPS + help + Provides a simple namespace cgroup subsystem to + provide hierarchical naming of sets of namespaces, + for instance virtual servers and checkpoint/restart + jobs. config CGROUP_FREEZER - bool "control group freezer subsystem" - depends on CGROUPS - help - Provides a way to freeze and unfreeze all tasks in a + bool "Freezer cgroup subsystem" + depends on CGROUPS + help + Provides a way to freeze and unfreeze all tasks in a cgroup. config CGROUP_DEVICE @@ -388,18 +389,23 @@ config CPUSETS Say N if unsure. +config PROC_PID_CPUSET + bool "Include legacy /proc//cpuset file" + depends on CPUSETS + default y + config CGROUP_CPUACCT bool "Simple CPU accounting cgroup subsystem" depends on CGROUPS help Provides a simple Resource Controller for monitoring the - total CPU consumed by the tasks in a cgroup + total CPU consumed by the tasks in a cgroup. config RESOURCE_COUNTERS bool "Resource counters" help This option enables controller independent resource accounting - infrastructure that works with cgroups + infrastructure that works with cgroups. depends on CGROUPS config CGROUP_MEM_RES_CTLR @@ -425,9 +431,6 @@ config CGROUP_MEM_RES_CTLR This config option also selects MM_OWNER config option, which could in turn add some fork/exit overhead. -config MM_OWNER - bool - config CGROUP_MEM_RES_CTLR_SWAP bool "Memory Resource Controller Swap Extension(EXPERIMENTAL)" depends on CGROUP_MEM_RES_CTLR && SWAP && EXPERIMENTAL @@ -444,8 +447,10 @@ config CGROUP_MEM_RES_CTLR_SWAP there will be no overhead from this. Even when you set this config=y, if boot option "noswapaccount" is set, swap will not be accounted. +endif # CGROUPS -endmenu +config MM_OWNER + bool config SYSFS_DEPRECATED bool @@ -483,11 +488,6 @@ config SYSFS_DEPRECATED_V2 if the original kernel, that came with your distribution, has this option set to N. -config PROC_PID_CPUSET - bool "Include legacy /proc//cpuset file" - depends on CPUSETS - default y - config RELAY bool "Kernel->user space relay support (formerly relayfs)" help -- cgit From 45ce80fb6b6f9594d1396d44dd7e7c02d596fef8 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Jan 2009 13:50:59 -0800 Subject: cgroups: consolidate cgroup documents Move Documentation/cpusets.txt and Documentation/controllers/* to Documentation/cgroups/ Signed-off-by: Li Zefan Acked-by: KAMEZAWA Hiroyuki Acked-by: Balbir Singh Acked-by: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/cgroups.txt | 5 +- Documentation/cgroups/cpuacct.txt | 32 + Documentation/cgroups/cpusets.txt | 808 +++++++++++++++++++++++++ Documentation/cgroups/devices.txt | 52 ++ Documentation/cgroups/memcg_test.txt | 342 +++++++++++ Documentation/cgroups/memory.txt | 399 ++++++++++++ Documentation/cgroups/resource_counter.txt | 181 ++++++ Documentation/controllers/cpuacct.txt | 32 - Documentation/controllers/devices.txt | 52 -- Documentation/controllers/memcg_test.txt | 342 ----------- Documentation/controllers/memory.txt | 399 ------------ Documentation/controllers/resource_counter.txt | 181 ------ Documentation/cpusets.txt | 808 ------------------------- Documentation/scheduler/sched-design-CFS.txt | 2 +- include/linux/res_counter.h | 2 +- init/Kconfig | 9 +- kernel/cpuset.c | 2 +- 17 files changed, 1824 insertions(+), 1824 deletions(-) create mode 100644 Documentation/cgroups/cpuacct.txt create mode 100644 Documentation/cgroups/cpusets.txt create mode 100644 Documentation/cgroups/devices.txt create mode 100644 Documentation/cgroups/memcg_test.txt create mode 100644 Documentation/cgroups/memory.txt create mode 100644 Documentation/cgroups/resource_counter.txt delete mode 100644 Documentation/controllers/cpuacct.txt delete mode 100644 Documentation/controllers/devices.txt delete mode 100644 Documentation/controllers/memcg_test.txt delete mode 100644 Documentation/controllers/memory.txt delete mode 100644 Documentation/controllers/resource_counter.txt delete mode 100644 Documentation/cpusets.txt diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt index e33ee74eee7..d9e5d6f41b9 100644 --- a/Documentation/cgroups/cgroups.txt +++ b/Documentation/cgroups/cgroups.txt @@ -1,7 +1,8 @@ CGROUPS ------- -Written by Paul Menage based on Documentation/cpusets.txt +Written by Paul Menage based on +Documentation/cgroups/cpusets.txt Original copyright statements from cpusets.txt: Portions Copyright (C) 2004 BULL SA. @@ -68,7 +69,7 @@ On their own, the only use for cgroups is for simple job tracking. The intention is that other subsystems hook into the generic cgroup support to provide new attributes for cgroups, such as accounting/limiting the resources which processes in a cgroup can -access. For example, cpusets (see Documentation/cpusets.txt) allows +access. For example, cpusets (see Documentation/cgroups/cpusets.txt) allows you to associate a set of CPUs and a set of memory nodes with the tasks in each cgroup. diff --git a/Documentation/cgroups/cpuacct.txt b/Documentation/cgroups/cpuacct.txt new file mode 100644 index 00000000000..bb775fbe43d --- /dev/null +++ b/Documentation/cgroups/cpuacct.txt @@ -0,0 +1,32 @@ +CPU Accounting Controller +------------------------- + +The CPU accounting controller is used to group tasks using cgroups and +account the CPU usage of these groups of tasks. + +The CPU accounting controller supports multi-hierarchy groups. An accounting +group accumulates the CPU usage of all of its child groups and the tasks +directly present in its group. + +Accounting groups can be created by first mounting the cgroup filesystem. + +# mkdir /cgroups +# mount -t cgroup -ocpuacct none /cgroups + +With the above step, the initial or the parent accounting group +becomes visible at /cgroups. At bootup, this group includes all the +tasks in the system. /cgroups/tasks lists the tasks in this cgroup. +/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by +this group which is essentially the CPU time obtained by all the tasks +in the system. + +New accounting groups can be created under the parent group /cgroups. + +# cd /cgroups +# mkdir g1 +# echo $$ > g1 + +The above steps create a new group g1 and move the current shell +process (bash) into it. CPU time consumed by this bash and its children +can be obtained from g1/cpuacct.usage and the same is accumulated in +/cgroups/cpuacct.usage also. diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt new file mode 100644 index 00000000000..5c86c258c79 --- /dev/null +++ b/Documentation/cgroups/cpusets.txt @@ -0,0 +1,808 @@ + CPUSETS + ------- + +Copyright (C) 2004 BULL SA. +Written by Simon.Derr@bull.net + +Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. +Modified by Paul Jackson +Modified by Christoph Lameter +Modified by Paul Menage +Modified by Hidetoshi Seto + +CONTENTS: +========= + +1. Cpusets + 1.1 What are cpusets ? + 1.2 Why are cpusets needed ? + 1.3 How are cpusets implemented ? + 1.4 What are exclusive cpusets ? + 1.5 What is memory_pressure ? + 1.6 What is memory spread ? + 1.7 What is sched_load_balance ? + 1.8 What is sched_relax_domain_level ? + 1.9 How do I use cpusets ? +2. Usage Examples and Syntax + 2.1 Basic Usage + 2.2 Adding/removing cpus + 2.3 Setting flags + 2.4 Attaching processes +3. Questions +4. Contact + +1. Cpusets +========== + +1.1 What are cpusets ? +---------------------- + +Cpusets provide a mechanism for assigning a set of CPUs and Memory +Nodes to a set of tasks. In this document "Memory Node" refers to +an on-line node that contains memory. + +Cpusets constrain the CPU and Memory placement of tasks to only +the resources within a tasks current cpuset. They form a nested +hierarchy visible in a virtual file system. These are the essential +hooks, beyond what is already present, required to manage dynamic +job placement on large systems. + +Cpusets use the generic cgroup subsystem described in +Documentation/cgroups/cgroups.txt. + +Requests by a task, using the sched_setaffinity(2) system call to +include CPUs in its CPU affinity mask, and using the mbind(2) and +set_mempolicy(2) system calls to include Memory Nodes in its memory +policy, are both filtered through that tasks cpuset, filtering out any +CPUs or Memory Nodes not in that cpuset. The scheduler will not +schedule a task on a CPU that is not allowed in its cpus_allowed +vector, and the kernel page allocator will not allocate a page on a +node that is not allowed in the requesting tasks mems_allowed vector. + +User level code may create and destroy cpusets by name in the cgroup +virtual file system, manage the attributes and permissions of these +cpusets and which CPUs and Memory Nodes are assigned to each cpuset, +specify and query to which cpuset a task is assigned, and list the +task pids assigned to a cpuset. + + +1.2 Why are cpusets needed ? +---------------------------- + +The management of large computer systems, with many processors (CPUs), +complex memory cache hierarchies and multiple Memory Nodes having +non-uniform access times (NUMA) presents additional challenges for +the efficient scheduling and memory placement of processes. + +Frequently more modest sized systems can be operated with adequate +efficiency just by letting the operating system automatically share +the available CPU and Memory resources amongst the requesting tasks. + +But larger systems, which benefit more from careful processor and +memory placement to reduce memory access times and contention, +and which typically represent a larger investment for the customer, +can benefit from explicitly placing jobs on properly sized subsets of +the system. + +This can be especially valuable on: + + * Web Servers running multiple instances of the same web application, + * Servers running different applications (for instance, a web server + and a database), or + * NUMA systems running large HPC applications with demanding + performance characteristics. + +These subsets, or "soft partitions" must be able to be dynamically +adjusted, as the job mix changes, without impacting other concurrently +executing jobs. The location of the running jobs pages may also be moved +when the memory locations are changed. + +The kernel cpuset patch provides the minimum essential kernel +mechanisms required to efficiently implement such subsets. It +leverages existing CPU and Memory Placement facilities in the Linux +kernel to avoid any additional impact on the critical scheduler or +memory allocator code. + + +1.3 How are cpusets implemented ? +--------------------------------- + +Cpusets provide a Linux kernel mechanism to constrain which CPUs and +Memory Nodes are used by a process or set of processes. + +The Linux kernel already has a pair of mechanisms to specify on which +CPUs a task may be scheduled (sched_setaffinity) and on which Memory +Nodes it may obtain memory (mbind, set_mempolicy). + +Cpusets extends these two mechanisms as follows: + + - Cpusets are sets of allowed CPUs and Memory Nodes, known to the + kernel. + - Each task in the system is attached to a cpuset, via a pointer + in the task structure to a reference counted cgroup structure. + - Calls to sched_setaffinity are filtered to just those CPUs + allowed in that tasks cpuset. + - Calls to mbind and set_mempolicy are filtered to just + those Memory Nodes allowed in that tasks cpuset. + - The root cpuset contains all the systems CPUs and Memory + Nodes. + - For any cpuset, one can define child cpusets containing a subset + of the parents CPU and Memory Node resources. + - The hierarchy of cpusets can be mounted at /dev/cpuset, for + browsing and manipulation from user space. + - A cpuset may be marked exclusive, which ensures that no other + cpuset (except direct ancestors and descendents) may contain + any overlapping CPUs or Memory Nodes. + - You can list all the tasks (by pid) attached to any cpuset. + +The implementation of cpusets requires a few, simple hooks +into the rest of the kernel, none in performance critical paths: + + - in init/main.c, to initialize the root cpuset at system boot. + - in fork and exit, to attach and detach a task from its cpuset. + - in sched_setaffinity, to mask the requested CPUs by what's + allowed in that tasks cpuset. + - in sched.c migrate_all_tasks(), to keep migrating tasks within + the CPUs allowed by their cpuset, if possible. + - in the mbind and set_mempolicy system calls, to mask the requested + Memory Nodes by what's allowed in that tasks cpuset. + - in page_alloc.c, to restrict memory to allowed nodes. + - in vmscan.c, to restrict page recovery to the current cpuset. + +You should mount the "cgroup" filesystem type in order to enable +browsing and modifying the cpusets presently known to the kernel. No +new system calls are added for cpusets - all support for querying and +modifying cpusets is via this cpuset file system. + +The /proc//status file for each task has four added lines, +displaying the tasks cpus_allowed (on which CPUs it may be scheduled) +and mems_allowed (on which Memory Nodes it may obtain memory), +in the two formats seen in the following example: + + Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff + Cpus_allowed_list: 0-127 + Mems_allowed: ffffffff,ffffffff + Mems_allowed_list: 0-63 + +Each cpuset is represented by a directory in the cgroup file system +containing (on top of the standard cgroup files) the following +files describing that cpuset: + + - cpus: list of CPUs in that cpuset + - mems: list of Memory Nodes in that cpuset + - memory_migrate flag: if set, move pages to cpusets nodes + - cpu_exclusive flag: is cpu placement exclusive? + - mem_exclusive flag: is memory placement exclusive? + - mem_hardwall flag: is memory allocation hardwalled + - memory_pressure: measure of how much paging pressure in cpuset + +In addition, the root cpuset only has the following file: + - memory_pressure_enabled flag: compute memory_pressure? + +New cpusets are created using the mkdir system call or shell +command. The properties of a cpuset, such as its flags, allowed +CPUs and Memory Nodes, and attached tasks, are modified by writing +to the appropriate file in that cpusets directory, as listed above. + +The named hierarchical structure of nested cpusets allows partitioning +a large system into nested, dynamically changeable, "soft-partitions". + +The attachment of each task, automatically inherited at fork by any +children of that task, to a cpuset allows organizing the work load +on a system into related sets of tasks such that each set is constrained +to using the CPUs and Memory Nodes of a particular cpuset. A task +may be re-attached to any other cpuset, if allowed by the permissions +on the necessary cpuset file system directories. + +Such management of a system "in the large" integrates smoothly with +the detailed placement done on individual tasks and memory regions +using the sched_setaffinity, mbind and set_mempolicy system calls. + +The following rules apply to each cpuset: + + - Its CPUs and Memory Nodes must be a subset of its parents. + - It can't be marked exclusive unless its parent is. + - If its cpu or memory is exclusive, they may not overlap any sibling. + +These rules, and the natural hierarchy of cpusets, enable efficient +enforcement of the exclusive guarantee, without having to scan all +cpusets every time any of them change to ensure nothing overlaps a +exclusive cpuset. Also, the use of a Linux virtual file system (vfs) +to represent the cpuset hierarchy provides for a familiar permission +and name space for cpusets, with a minimum of additional kernel code. + +The cpus and mems files in the root (top_cpuset) cpuset are +read-only. The cpus file automatically tracks the value of +cpu_online_map using a CPU hotplug notifier, and the mems file +automatically tracks the value of node_states[N_HIGH_MEMORY]--i.e., +nodes with memory--using the cpuset_track_online_nodes() hook. + + +1.4 What are exclusive cpusets ? +-------------------------------- + +If a cpuset is cpu or mem exclusive, no other cpuset, other than +a direct ancestor or descendent, may share any of the same CPUs or +Memory Nodes. + +A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled", +i.e. it restricts kernel allocations for page, buffer and other data +commonly shared by the kernel across multiple users. All cpusets, +whether hardwalled or not, restrict allocations of memory for user +space. This enables configuring a system so that several independent +jobs can share common kernel data, such as file system pages, while +isolating each job's user allocation in its own cpuset. To do this, +construct a large mem_exclusive cpuset to hold all the jobs, and +construct child, non-mem_exclusive cpusets for each individual job. +Only a small amount of typical kernel memory, such as requests from +interrupt handlers, is allowed to be taken outside even a +mem_exclusive cpuset. + + +1.5 What is memory_pressure ? +----------------------------- +The memory_pressure of a cpuset provides a simple per-cpuset metric +of the rate that the tasks in a cpuset are attempting to free up in +use memory on the nodes of the cpuset to satisfy additional memory +requests. + +This enables batch managers monitoring jobs running in dedicated +cpusets to efficiently detect what level of memory pressure that job +is causing. + +This is useful both on tightly managed systems running a wide mix of +submitted jobs, which may choose to terminate or re-prioritize jobs that +are trying to use more memory than allowed on the nodes assigned them, +and with tightly coupled, long running, massively parallel scientific +computing jobs that will dramatically fail to meet required performance +goals if they start to use more memory than allowed to them. + +This mechanism provides a very economical way for the batch manager +to monitor a cpuset for signs of memory pressure. It's up to the +batch manager or other user code to decide what to do about it and +take action. + +==> Unless this feature is enabled by writing "1" to the special file + /dev/cpuset/memory_pressure_enabled, the hook in the rebalance + code of __alloc_pages() for this metric reduces to simply noticing + that the cpuset_memory_pressure_enabled flag is zero. So only + systems that enable this feature will compute the metric. + +Why a per-cpuset, running average: + + Because this meter is per-cpuset, rather than per-task or mm, + the system load imposed by a batch scheduler monitoring this + metric is sharply reduced on large systems, because a scan of + the tasklist can be avoided on each set of queries. + + Because this meter is a running average, instead of an accumulating + counter, a batch scheduler can detect memory pressure with a + single read, instead of having to read and accumulate results + for a period of time. + + Because this meter is per-cpuset rather than per-task or mm, + the batch scheduler can obtain the key information, memory + pressure in a cpuset, with a single read, rather than having to + query and accumulate results over all the (dynamically changing) + set of tasks in the cpuset. + +A per-cpuset simple digital filter (requires a spinlock and 3 words +of data per-cpuset) is kept, and updated by any task attached to that +cpuset, if it enters the synchronous (direct) page reclaim code. + +A per-cpuset file provides an integer number representing the recent +(half-life of 10 seconds) rate of direct page reclaims caused by +the tasks in the cpuset, in units of reclaims attempted per second, +times 1000. + + +1.6 What is memory spread ? +--------------------------- +There are two boolean flag files per cpuset that control where the +kernel allocates pages for the file system buffers and related in +kernel data structures. They are called 'memory_spread_page' and +'memory_spread_slab'. + +If the per-cpuset boolean flag file 'memory_spread_page' is set, then +the kernel will spread the file system buffers (page cache) evenly +over all the nodes that the faulting task is allowed to use, instead +of preferring to put those pages on the node where the task is running. + +If the per-cpuset boolean flag file 'memory_spread_slab' is set, +then the kernel will spread some file system related slab caches, +such as for inodes and dentries evenly over all the nodes that the +faulting task is allowed to use, instead of preferring to put those +pages on the node where the task is running. + +The setting of these flags does not affect anonymous data segment or +stack segment pages of a task. + +By default, both kinds of memory spreading are off, and memory +pages are allocated on the node local to where the task is running, +except perhaps as modified by the tasks NUMA mempolicy or cpuset +configuration, so long as sufficient free memory pages are available. + +When new cpusets are created, they inherit the memory spread settings +of their parent. + +Setting memory spreading causes allocations for the affected page +or slab caches to ignore the tasks NUMA mempolicy and be spread +instead. Tasks using mbind() or set_mempolicy() calls to set NUMA +mempolicies will not notice any change in these calls as a result of +their containing tasks memory spread settings. If memory spreading +is turned off, then the currently specified NUMA mempolicy once again +applies to memory page allocations. + +Both 'memory_spread_page' and 'memory_spread_slab' are boolean flag +files. By default they contain "0", meaning that the feature is off +for that cpuset. If a "1" is written to that file, then that turns +the named feature on. + +The implementation is simple. + +Setting the flag 'memory_spread_page' turns on a per-process flag +PF_SPREAD_PAGE for each task that is in that cpuset or subsequently +joins that cpuset. The page allocation calls for the page cache +is modified to perform an inline check for this PF_SPREAD_PAGE task +flag, and if set, a call to a new routine cpuset_mem_spread_node() +returns the node to prefer for the allocation. + +Similarly, setting 'memory_spread_slab' turns on the flag +PF_SPREAD_SLAB, and appropriately marked slab caches will allocate +pages from the node returned by cpuset_mem_spread_node(). + +The cpuset_mem_spread_node() routine is also simple. It uses the +value of a per-task rotor cpuset_mem_spread_rotor to select the next +node in the current tasks mems_allowed to prefer for the allocation. + +This memory placement policy is also known (in other contexts) as +round-robin or interleave. + +This policy can provide substantial improvements for jobs that need +to place thread local data on the corresponding node, but that need +to access large file system data sets that need to be spread across +the several nodes in the jobs cpuset in order to fit. Without this +policy, especially for jobs that might have one thread reading in the +data set, the memory allocation across the nodes in the jobs cpuset +can become very uneven. + +1.7 What is sched_load_balance ? +-------------------------------- + +The kernel scheduler (kernel/sched.c) automatically load balances +tasks. If one CPU is underutilized, kernel code running on that +CPU will look for tasks on other more overloaded CPUs and move those +tasks to itself, within the constraints of such placement mechanisms +as cpusets and sched_setaffinity. + +The algorithmic cost of load balancing and its impact on key shared +kernel data structures such as the task list increases more than +linearly with the number of CPUs being balanced. So the scheduler +has support to partition the systems CPUs into a number of sched +domains such that it only load balances within each sched domain. +Each sched domain covers some subset of the CPUs in the system; +no two sched domains overlap; some CPUs might not be in any sched +domain and hence won't be load balanced. + +Put simply, it costs less to balance between two smaller sched domains +than one big one, but doing so means that overloads in one of the +two domains won't be load balanced to the other one. + +By default, there is one sched domain covering all CPUs, except those +marked isolated using the kernel boot time "isolcpus=" argument. + +This default load balancing across all CPUs is not well suited for +the following two situations: + 1) On large systems, load balancing across many CPUs is expensive. + If the system is managed using cpusets to place independent jobs + on separate sets of CPUs, full load balancing is unnecessary. + 2) Systems supporting realtime on some CPUs need to minimize + system overhead on those CPUs, including avoiding task load + balancing if that is not needed. + +When the per-cpuset flag "sched_load_balance" is enabled (the default +setting), it requests that all the CPUs in that cpusets allowed 'cpus' +be contained in a single sched domain, ensuring that load balancing +can move a task (not otherwised pinned, as by sched_setaffinity) +from any CPU in that cpuset to any other. + +When the per-cpuset flag "sched_load_balance" is disabled, then the +scheduler will avoid load balancing across the CPUs in that cpuset, +--except-- in so far as is necessary because some overlapping cpuset +has "sched_load_balance" enabled. + +So, for example, if the top cpuset has the flag "sched_load_balance" +enabled, then the scheduler will have one sched domain covering all +CPUs, and the setting of the "sched_load_balance" flag in any other +cpusets won't matter, as we're already fully load balancing. + +Therefore in the above two situations, the top cpuset flag +"sched_load_balance" should be disabled, and only some of the smaller, +child cpusets have this flag enabled. + +When doing this, you don't usually want to leave any unpinned tasks in +the top cpuset that might use non-trivial amounts of CPU, as such tasks +may be artificially constrained to some subset of CPUs, depending on +the particulars of this flag setting in descendent cpusets. Even if +such a task could use spare CPU cycles in some other CPUs, the kernel +scheduler might not consider the possibility of load balancing that +task to that underused CPU. + +Of course, tasks pinned to a particular CPU can be left in a cpuset +that disables "sched_load_balance" as those tasks aren't going anywhere +else anyway. + +There is an impedance mismatch here, between cpusets and sched domains. +Cpusets are hierarchical and nest. Sched domains are flat; they don't +overlap and each CPU is in at most one sched domain. + +It is necessary for sched domains to be flat because load balancing +across partially overlapping sets of CPUs would risk unstable dynamics +that would be beyond our understanding. So if each of two partially +overlapping cpusets enables the flag 'sched_load_balance', then we +form a single sched domain that is a superset of both. We won't move +a task to a CPU outside it cpuset, but the scheduler load balancing +code might waste some compute cycles considering that possibility. + +This mismatch is why there is not a simple one-to-one relation +between which cpusets have the flag "sched_load_balance" enabled, +and the sched domain configuration. If a cpuset enables the flag, it +will get balancing across all its CPUs, but if it disables the flag, +it will only be assured of no load balancing if no other overlapping +cpuset enables the flag. + +If two cpusets have partially overlapping 'cpus' allowed, and only +one of them has this flag enabled, then the other may find its +tasks only partially load balanced, just on the overlapping CPUs. +This is just the general case of the top_cpuset example given a few +paragraphs above. In the general case, as in the top cpuset case, +don't leave tasks that might use non-trivial amounts of CPU in +such partially load balanced cpusets, as they may be artificially +constrained to some subset of the CPUs allowed to them, for lack of +load balancing to the other CPUs. + +1.7.1 sched_load_balance implementation details. +------------------------------------------------ + +The per-cpuset flag 'sched_load_balance' defaults to enabled (contrary +to most cpuset flags.) When enabled for a cpuset, the kernel will +ensure that it can load balance across all the CPUs in that cpuset +(makes sure that all the CPUs in the cpus_allowed of that cpuset are +in the same sched domain.) + +If two overlapping cpusets both have 'sched_load_balance' enabled, +then they will be (must be) both in the same sched domain. + +If, as is the default, the top cpuset has 'sched_load_balance' enabled, +then by the above that means there is a single sched domain covering +the whole system, regardless of any other cpuset settings. + +The kernel commits to user space that it will avoid load balancing +where it can. It will pick as fine a granularity partition of sched +domains as it can while still providing load balancing for any set +of CPUs allowed to a cpuset having 'sched_load_balance' enabled. + +The internal kernel cpuset to scheduler interface passes from the +cpuset code to the scheduler code a partition of the load balanced +CPUs in the system. This partition is a set of subsets (represented +as an array of cpumask_t) of CPUs, pairwise disjoint, that cover all +the CPUs that must be load balanced. + +Whenever the 'sched_load_balance' flag changes, or CPUs come or go +from a cpuset with this flag enabled, or a cpuset with this flag +enabled is removed, the cpuset code builds a new such partition and +passes it to the scheduler sched domain setup code, to have the sched +domains rebuilt as necessary. + +This partition exactly defines what sched domains the scheduler should +setup - one sched domain for each element (cpumask_t) in the partition. + +The scheduler remembers the currently active sched domain partitions. +When the scheduler routine partition_sched_domains() is invoked from +the cpuset code to update these sched domains, it compares the new +partition requested with the current, and updates its sched domains, +removing the old and adding the new, for each change. + + +1.8 What is sched_relax_domain_level ? +-------------------------------------- + +In sched domain, the scheduler migrates tasks in 2 ways; periodic load +balance on tick, and at time of some schedule events. + +When a task is woken up, scheduler try to move the task on idle CPU. +For example, if a task A running on CPU X activates another task B +on the same CPU X, and if CPU Y is X's sibling and performing idle, +then scheduler migrate task B to CPU Y so that task B can start on +CPU Y without waiting task A on CPU X. + +And if a CPU run out of tasks in its runqueue, the CPU try to pull +extra tasks from other busy CPUs to help them before it is going to +be idle. + +Of course it takes some searching cost to find movable tasks and/or +idle CPUs, the scheduler might not search all CPUs in the domain +everytime. In fact, in some architectures, the searching ranges on +events are limited in the same socket or node where the CPU locates, +while the load balance on tick searchs all. + +For example, assume CPU Z is relatively far from CPU X. Even if CPU Z +is idle while CPU X and the siblings are busy, scheduler can't migrate +woken task B from X to Z since it is out of its searching range. +As the result, task B on CPU X need to wait task A or wait load balance +on the next tick. For some applications in special situation, waiting +1 tick may be too long. + +The 'sched_relax_domain_level' file allows you to request changing +this searching range as you like. This file takes int value which +indicates size of searching range in levels ideally as follows, +otherwise initial value -1 that indicates the cpuset has no request. + + -1 : no request. use system default or follow request of others. + 0 : no search. + 1 : search siblings (hyperthreads in a core). + 2 : search cores in a package. + 3 : search cpus in a node [= system wide on non-NUMA system] + ( 4 : search nodes in a chunk of node [on NUMA system] ) + ( 5 : search system wide [on NUMA system] ) + +The system default is architecture dependent. The system default +can be changed using the relax_domain_level= boot parameter. + +This file is per-cpuset and affect the sched domain where the cpuset +belongs to. Therefore if the flag 'sched_load_balance' of a cpuset +is disabled, then 'sched_relax_domain_level' have no effect since +there is no sched domain belonging the cpuset. + +If multiple cpusets are overlapping and hence they form a single sched +domain, the largest value among those is used. Be careful, if one +requests 0 and others are -1 then 0 is used. + +Note that modifying this file will have both good and bad effects, +and whether it is acceptable or not will be depend on your situation. +Don't modify this file if you are not sure. + +If your situation is: + - The migration costs between each cpu can be assumed considerably + small(for you) due to your special application's behavior or + special hardware support for CPU cache etc. + - The searching cost doesn't have impact(for you) or you can make + the searching cost enough small by managing cpuset to compact etc. + - The latency is required even it sacrifices cache hit rate etc. +then increasing 'sched_relax_domain_level' would benefit you. + + +1.9 How do I use cpusets ? +-------------------------- + +In order to minimize the impact of cpusets on critical kernel +code, such as the scheduler, and due to the fact that the kernel +does not support one task updating the memory placement of another +task directly, the impact on a task of changing its cpuset CPU +or Memory Node placement, or of changing to which cpuset a task +is attached, is subtle. + +If a cpuset has its Memory Nodes modified, then for each task attached +to that cpuset, the next time that the kernel attempts to allocate +a page of memory for that task, the kernel will notice the change +in the tasks cpuset, and update its per-task memory placement to +remain within the new cpusets memory placement. If the task was using +mempolicy MPOL_BIND, and the nodes to which it was bound overlap with +its new cpuset, then the task will continue to use whatever subset +of MPOL_BIND nodes are still allowed in the new cpuset. If the task +was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed +in the new cpuset, then the task will be essentially treated as if it +was MPOL_BIND bound to the new cpuset (even though its numa placement, +as queried by get_mempolicy(), doesn't change). If a task is moved +from one cpuset to another, then the kernel will adjust the tasks +memory placement, as above, the next time that the kernel attempts +to allocate a page of memory for that task. + +If a cpuset has its 'cpus' modified, then each task in that cpuset +will have its allowed CPU placement changed immediately. Similarly, +if a tasks pid is written to a cpusets 'tasks' file, in either its +current cpuset or another cpuset, then its allowed CPU placement is +changed immediately. If such a task had been bound to some subset +of its cpuset using the sched_setaffinity() call, the task will be +allowed to run on any CPU allowed in its new cpuset, negating the +affect of the prior sched_setaffinity() call. + +In summary, the memory placement of a task whose cpuset is changed is +updated by the kernel, on the next allocation of a page for that task, +but the processor placement is not updated, until that tasks pid is +rewritten to the 'tasks' file of its cpuset. This is done to avoid +impacting the scheduler code in the kernel with a check for changes +in a tasks processor placement. + +Normally, once a page is allocated (given a physical page +of main memory) then that page stays on whatever node it +was allocated, so long as it remains allocated, even if the +cpusets memory placement policy 'mems' subsequently changes. +If the cpuset flag file 'memory_migrate' is set true, then when +tasks are attached to that cpuset, any pages that task had +allocated to it on nodes in its previous cpuset are migrated +to the tasks new cpuset. The relative placement of the page within +the cpuset is preserved during these migration operations if possible. +For example if the page was on the second valid node of the prior cpuset +then the page will be placed on the second valid node of the new cpuset. + +Also if 'memory_migrate' is set true, then if that cpusets +'mems' file is modified, pages allocated to tasks in that +cpuset, that were on nodes in the previous setting of 'mems', +will be moved to nodes in the new setting of 'mems.' +Pages that were not in the tasks prior cpuset, or in the cpusets +prior 'mems' setting, will not be moved. + +There is an exception to the above. If hotplug functionality is used +to remove all the CPUs that are currently assigned to a cpuset, +then all the tasks in that cpuset will be moved to the nearest ancestor +with non-empty cpus. But the moving of some (or all) tasks might fail if +cpuset is bound with another cgroup subsystem which has some restrictions +on task attaching. In this failing case, those tasks will stay +in the original cpuset, and the kernel will automatically update +their cpus_allowed to allow all online CPUs. When memory hotplug +functionality for removing Memory Nodes is available, a similar exception +is expected to apply there as well. In general, the kernel prefers to +violate cpuset placement, over starving a task that has had all +its allowed CPUs or Memory Nodes taken offline. + +There is a second exception to the above. GFP_ATOMIC requests are +kernel internal allocations that must be satisfied, immediately. +The kernel may drop some request, in rare cases even panic, if a +GFP_ATOMIC alloc fails. If the request cannot be satisfied within +the current tasks cpuset, then we relax the cpuset, and look for +memory anywhere we can find it. It's better to violate the cpuset +than stress the kernel. + +To start a new job that is to be contained within a cpuset, the steps are: + + 1) mkdir /dev/cpuset + 2) mount -t cgroup -ocpuset cpuset /dev/cpuset + 3) Create the new cpuset by doing mkdir's and write's (or echo's) in + the /dev/cpuset virtual file system. + 4) Start a task that will be the "founding father" of the new job. + 5) Attach that task to the new cpuset by writing its pid to the + /dev/cpuset tasks file for that cpuset. + 6) fork, exec or clone the job tasks from this founding father task. + +For example, the following sequence of commands will setup a cpuset +named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, +and then start a subshell 'sh' in that cpuset: + + mount -t cgroup -ocpuset cpuset /dev/cpuset + cd /dev/cpuset + mkdir Charlie + cd Charlie + /bin/echo 2-3 > cpus + /bin/echo 1 > mems + /bin/echo $$ > tasks + sh + # The subshell 'sh' is now running in cpuset Charlie + # The next line should display '/Charlie' + cat /proc/self/cpuset + +In the future, a C library interface to cpusets will likely be +available. For now, the only way to query or modify cpusets is +via the cpuset file system, using the various cd, mkdir, echo, cat, +rmdir commands from the shell, or their equivalent from C. + +The sched_setaffinity calls can also be done at the shell prompt using +SGI's runon or Robert Love's taskset. The mbind and set_mempolicy +calls can be done at the shell prompt using the numactl command +(part of Andi Kleen's numa package). + +2. Usage Examples and Syntax +============================ + +2.1 Basic Usage +--------------- + +Creating, modifying, using the cpusets can be done through the cpuset +virtual filesystem. + +To mount it, type: +# mount -t cgroup -o cpuset cpuset /dev/cpuset + +Then under /dev/cpuset you can find a tree that corresponds to the +tree of the cpusets in the system. For instance, /dev/cpuset +is the cpuset that holds the whole system. + +If you want to create a new cpuset under /dev/cpuset: +# cd /dev/cpuset +# mkdir my_cpuset + +Now you want to do something with this cpuset. +# cd my_cpuset + +In this directory you can find several files: +# ls +cpu_exclusive memory_migrate mems tasks +cpus memory_pressure notify_on_release +mem_exclusive memory_spread_page sched_load_balance +mem_hardwall memory_spread_slab sched_relax_domain_level + +Reading them will give you information about the state of this cpuset: +the CPUs and Memory Nodes it can use, the processes that are using +it, its properties. By writing to these files you can manipulate +the cpuset. + +Set some flags: +# /bin/echo 1 > cpu_exclusive + +Add some cpus: +# /bin/echo 0-7 > cpus + +Add some mems: +# /bin/echo 0-7 > mems + +Now attach your shell to this cpuset: +# /bin/echo $$ > tasks + +You can also create cpusets inside your cpuset by using mkdir in this +directory. +# mkdir my_sub_cs + +To remove a cpuset, just use rmdir: +# rmdir my_sub_cs +This will fail if the cpuset is in use (has cpusets inside, or has +processes attached). + +Note that for legacy reasons, the "cpuset" filesystem exists as a +wrapper around the cgroup filesystem. + +The command + +mount -t cpuset X /dev/cpuset + +is equivalent to + +mount -t cgroup -ocpuset X /dev/cpuset +echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent + +2.2 Adding/removing cpus +------------------------ + +This is the syntax to use when writing in the cpus or mems files +in cpuset directories: + +# /bin/echo 1-4 > cpus -> set cpus list to cpus 1,2,3,4 +# /bin/echo 1,2,3,4 > cpus -> set cpus list to cpus 1,2,3,4 + +2.3 Setting flags +----------------- + +The syntax is very simple: + +# /bin/echo 1 > cpu_exclusive -> set flag 'cpu_exclusive' +# /bin/echo 0 > cpu_exclusive -> unset flag 'cpu_exclusive' + +2.4 Attaching processes +----------------------- + +# /bin/echo PID > tasks + +Note that it is PID, not PIDs. You can only attach ONE task at a time. +If you have several tasks to attach, you have to do it one after another: + +# /bin/echo PID1 > tasks +# /bin/echo PID2 > tasks + ... +# /bin/echo PIDn > tasks + + +3. Questions +============ + +Q: what's up with this '/bin/echo' ? +A: bash's builtin 'echo' command does not check calls to write() against + errors. If you use it in the cpuset file system, you won't be + able to tell whether a command succeeded or failed. + +Q: When I attach processes, only the first of the line gets really attached ! +A: We can only return one error code per call to write(). So you should also + put only ONE pid. + +4. Contact +========== + +Web: http://www.bullopensource.org/cpuset diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroups/devices.txt new file mode 100644 index 00000000000..7cc6e6a6067 --- /dev/null +++ b/Documentation/cgroups/devices.txt @@ -0,0 +1,52 @@ +Device Whitelist Controller + +1. Description: + +Implement a cgroup to track and enforce open and mknod restrictions +on device files. A device cgroup associates a device access +whitelist with each cgroup. A whitelist entry has 4 fields. +'type' is a (all), c (char), or b (block). 'all' means it applies +to all types and all major and minor numbers. Major and minor are +either an integer or * for all. Access is a composition of r +(read), w (write), and m (mknod). + +The root device cgroup starts with rwm to 'all'. A child device +cgroup gets a copy of the parent. Administrators can then remove +devices from the whitelist or add new entries. A child cgroup can +never receive a device access which is denied by its parent. However +when a device access is removed from a parent it will not also be +removed from the child(ren). + +2. User Interface + +An entry is added using devices.allow, and removed using +devices.deny. For instance + + echo 'c 1:3 mr' > /cgroups/1/devices.allow + +allows cgroup 1 to read and mknod the device usually known as +/dev/null. Doing + + echo a > /cgroups/1/devices.deny + +will remove the default 'a *:* rwm' entry. Doing + + echo a > /cgroups/1/devices.allow + +will add the 'a *:* rwm' entry to the whitelist. + +3. Security + +Any task can move itself between cgroups. This clearly won't +suffice, but we can decide the best way to adequately restrict +movement as people get some experience with this. We may just want +to require CAP_SYS_ADMIN, which at least is a separate bit from +CAP_MKNOD. We may want to just refuse moving to a cgroup which +isn't a descendent of the current one. Or we may want to use +CAP_MAC_ADMIN, since we really are trying to lock down root. + +CAP_SYS_ADMIN is needed to modify the whitelist or move another +task to a new cgroup. (Again we'll probably want to change that). + +A cgroup may not be granted more permissions than the cgroup's +parent has. diff --git a/Documentation/cgroups/memcg_test.txt b/Documentation/cgroups/memcg_test.txt new file mode 100644 index 00000000000..19533f93b7a --- /dev/null +++ b/Documentation/cgroups/memcg_test.txt @@ -0,0 +1,342 @@ +Memory Resource Controller(Memcg) Implementation Memo. +Last Updated: 2008/12/15 +Base Kernel Version: based on 2.6.28-rc8-mm. + +Because VM is getting complex (one of reasons is memcg...), memcg's behavior +is complex. This is a document for memcg's internal behavior. +Please note that implementation details can be changed. + +(*) Topics on API should be in Documentation/cgroups/memory.txt) + +0. How to record usage ? + 2 objects are used. + + page_cgroup ....an object per page. + Allocated at boot or memory hotplug. Freed at memory hot removal. + + swap_cgroup ... an entry per swp_entry. + Allocated at swapon(). Freed at swapoff(). + + The page_cgroup has USED bit and double count against a page_cgroup never + occurs. swap_cgroup is used only when a charged page is swapped-out. + +1. Charge + + a page/swp_entry may be charged (usage += PAGE_SIZE) at + + mem_cgroup_newpage_charge() + Called at new page fault and Copy-On-Write. + + mem_cgroup_try_charge_swapin() + Called at do_swap_page() (page fault on swap entry) and swapoff. + Followed by charge-commit-cancel protocol. (With swap accounting) + At commit, a charge recorded in swap_cgroup is removed. + + mem_cgroup_cache_charge() + Called at add_to_page_cache() + + mem_cgroup_cache_charge_swapin() + Called at shmem's swapin. + + mem_cgroup_prepare_migration() + Called before migration. "extra" charge is done and followed by + charge-commit-cancel protocol. + At commit, charge against oldpage or newpage will be committed. + +2. Uncharge + a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by + + mem_cgroup_uncharge_page() + Called when an anonymous page is fully unmapped. I.e., mapcount goes + to 0. If the page is SwapCache, uncharge is delayed until + mem_cgroup_uncharge_swapcache(). + + mem_cgroup_uncharge_cache_page() + Called when a page-cache is deleted from radix-tree. If the page is + SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache(). + + mem_cgroup_uncharge_swapcache() + Called when SwapCache is removed from radix-tree. The charge itself + is moved to swap_cgroup. (If mem+swap controller is disabled, no + charge to swap occurs.) + + mem_cgroup_uncharge_swap() + Called when swp_entry's refcnt goes down to 0. A charge against swap + disappears. + + mem_cgroup_end_migration(old, new) + At success of migration old is uncharged (if necessary), a charge + to new page is committed. At failure, charge to old page is committed. + +3. charge-commit-cancel + In some case, we can't know this "charge" is valid or not at charging + (because of races). + To handle such case, there are charge-commit-cancel functions. + mem_cgroup_try_charge_XXX + mem_cgroup_commit_charge_XXX + mem_cgroup_cancel_charge_XXX + these are used in swap-in and migration. + + At try_charge(), there are no flags to say "this page is charged". + at this point, usage += PAGE_SIZE. + + At commit(), the function checks the page should be charged or not + and set flags or avoid charging.(usage -= PAGE_SIZE) + + At cancel(), simply usage -= PAGE_SIZE. + +Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. + +4. Anonymous + Anonymous page is newly allocated at + - page fault into MAP_ANONYMOUS mapping. + - Copy-On-Write. + It is charged right after it's allocated before doing any page table + related operations. Of course, it's uncharged when another page is used + for the fault address. + + At freeing anonymous page (by exit() or munmap()), zap_pte() is called + and pages for ptes are freed one by one.(see mm/memory.c). Uncharges + are done at page_remove_rmap() when page_mapcount() goes down to 0. + + Another page freeing is by page-reclaim (vmscan.c) and anonymous + pages are swapped out. In this case, the page is marked as + PageSwapCache(). uncharge() routine doesn't uncharge the page marked + as SwapCache(). It's delayed until __delete_from_swap_cache(). + + 4.1 Swap-in. + At swap-in, the page is taken from swap-cache. There are 2 cases. + + (a) If the SwapCache is newly allocated and read, it has no charges. + (b) If the SwapCache has been mapped by processes, it has been + charged already. + + This swap-in is one of the most complicated work. In do_swap_page(), + following events occur when pte is unchanged. + + (1) the page (SwapCache) is looked up. + (2) lock_page() + (3) try_charge_swapin() + (4) reuse_swap_page() (may call delete_swap_cache()) + (5) commit_charge_swapin() + (6) swap_free(). + + Considering following situation for example. + + (A) The page has not been charged before (2) and reuse_swap_page() + doesn't call delete_from_swap_cache(). + (B) The page has not been charged before (2) and reuse_swap_page() + calls delete_from_swap_cache(). + (C) The page has been charged before (2) and reuse_swap_page() doesn't + call delete_from_swap_cache(). + (D) The page has been charged before (2) and reuse_swap_page() calls + delete_from_swap_cache(). + + memory.usage/memsw.usage changes to this page/swp_entry will be + Case (A) (B) (C) (D) + Event + Before (2) 0/ 1 0/ 1 1/ 1 1/ 1 + =========================================== + (3) +1/+1 +1/+1 +1/+1 +1/+1 + (4) - 0/ 0 - -1/ 0 + (5) 0/-1 0/ 0 -1/-1 0/ 0 + (6) - 0/-1 - 0/-1 + =========================================== + Result 1/ 1 1/ 1 1/ 1 1/ 1 + + In any cases, charges to this page should be 1/ 1. + + 4.2 Swap-out. + At swap-out, typical state transition is below. + + (a) add to swap cache. (marked as SwapCache) + swp_entry's refcnt += 1. + (b) fully unmapped. + swp_entry's refcnt += # of ptes. + (c) write back to swap. + (d) delete from swap cache. (remove from SwapCache) + swp_entry's refcnt -= 1. + + + At (b), the page is marked as SwapCache and not uncharged. + At (d), the page is removed from SwapCache and a charge in page_cgroup + is moved to swap_cgroup. + + Finally, at task exit, + (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0. + Here, a charge in swap_cgroup disappears. + +5. Page Cache + Page Cache is charged at + - add_to_page_cache_locked(). + + uncharged at + - __remove_from_page_cache(). + + The logic is very clear. (About migration, see below) + Note: __remove_from_page_cache() is called by remove_from_page_cache() + and __remove_mapping(). + +6. Shmem(tmpfs) Page Cache + Memcg's charge/uncharge have special handlers of shmem. The best way + to understand shmem's page state transition is to read mm/shmem.c. + But brief explanation of the behavior of memcg around shmem will be + helpful to understand the logic. + + Shmem's page (just leaf page, not direct/indirect block) can be on + - radix-tree of shmem's inode. + - SwapCache. + - Both on radix-tree and SwapCache. This happens at swap-in + and swap-out, + + It's charged when... + - A new page is added to shmem's radix-tree. + - A swp page is read. (move a charge from swap_cgroup to page_cgroup) + It's uncharged when + - A page is removed from radix-tree and not SwapCache. + - When SwapCache is removed, a charge is moved to swap_cgroup. + - When swp_entry's refcnt goes down to 0, a charge in swap_cgroup + disappears. + +7. Page Migration + One of the most complicated functions is page-migration-handler. + Memcg has 2 routines. Assume that we are migrating a page's contents + from OLDPAGE to NEWPAGE. + + Usual migration logic is.. + (a) remove the page from LRU. + (b) allocate NEWPAGE (migration target) + (c) lock by lock_page(). + (d) unmap all mappings. + (e-1) If necessary, replace entry in radix-tree. + (e-2) move contents of a page. + (f) map all mappings again. + (g) pushback the page to LRU. + (-) OLDPAGE will be freed. + + Before (g), memcg should complete all necessary charge/uncharge to + NEWPAGE/OLDPAGE. + + The point is.... + - If OLDPAGE is anonymous, all charges will be dropped at (d) because + try_to_unmap() drops all mapcount and the page will not be + SwapCache. + + - If OLDPAGE is SwapCache, charges will be kept at (g) because + __delete_from_swap_cache() isn't called at (e-1) + + - If OLDPAGE is page-cache, charges will be kept at (g) because + remove_from_swap_cache() isn't called at (e-1) + + memcg provides following hooks. + + - mem_cgroup_prepare_migration(OLDPAGE) + Called after (b) to account a charge (usage += PAGE_SIZE) against + memcg which OLDPAGE belongs to. + + - mem_cgroup_end_migration(OLDPAGE, NEWPAGE) + Called after (f) before (g). + If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already + charged, a charge by prepare_migration() is automatically canceled. + If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE. + + But zap_pte() (by exit or munmap) can be called while migration, + we have to check if OLDPAGE/NEWPAGE is a valid page after commit(). + +8. LRU + Each memcg has its own private LRU. Now, it's handling is under global + VM's control (means that it's handled under global zone->lru_lock). + Almost all routines around memcg's LRU is called by global LRU's + list management functions under zone->lru_lock(). + + A special function is mem_cgroup_isolate_pages(). This scans + memcg's private LRU and call __isolate_lru_page() to extract a page + from LRU. + (By __isolate_lru_page(), the page is removed from both of global and + private LRU.) + + +9. Typical Tests. + + Tests for racy cases. + + 9.1 Small limit to memcg. + When you do test to do racy case, it's good test to set memcg's limit + to be very small rather than GB. Many races found in the test under + xKB or xxMB limits. + (Memory behavior under GB and Memory behavior under MB shows very + different situation.) + + 9.2 Shmem + Historically, memcg's shmem handling was poor and we saw some amount + of troubles here. This is because shmem is page-cache but can be + SwapCache. Test with shmem/tmpfs is always good test. + + 9.3 Migration + For NUMA, migration is an another special case. To do easy test, cpuset + is useful. Following is a sample script to do migration. + + mount -t cgroup -o cpuset none /opt/cpuset + + mkdir /opt/cpuset/01 + echo 1 > /opt/cpuset/01/cpuset.cpus + echo 0 > /opt/cpuset/01/cpuset.mems + echo 1 > /opt/cpuset/01/cpuset.memory_migrate + mkdir /opt/cpuset/02 + echo 1 > /opt/cpuset/02/cpuset.cpus + echo 1 > /opt/cpuset/02/cpuset.mems + echo 1 > /opt/cpuset/02/cpuset.memory_migrate + + In above set, when you moves a task from 01 to 02, page migration to + node 0 to node 1 will occur. Following is a script to migrate all + under cpuset. + -- + move_task() + { + for pid in $1 + do + /bin/echo $pid >$2/tasks 2>/dev/null + echo -n $pid + echo -n " " + done + echo END + } + + G1_TASK=`cat ${G1}/tasks` + G2_TASK=`cat ${G2}/tasks` + move_task "${G1_TASK}" ${G2} & + -- + 9.4 Memory hotplug. + memory hotplug test is one of good test. + to offline memory, do following. + # echo offline > /sys/devices/system/memory/memoryXXX/state + (XXX is the place of memory) + This is an easy way to test page migration, too. + + 9.5 mkdir/rmdir + When using hierarchy, mkdir/rmdir test should be done. + Use tests like the following. + + echo 1 >/opt/cgroup/01/memory/use_hierarchy + mkdir /opt/cgroup/01/child_a + mkdir /opt/cgroup/01/child_b + + set limit to 01. + add limit to 01/child_b + run jobs under child_a and child_b + + create/delete following groups at random while jobs are running. + /opt/cgroup/01/child_a/child_aa + /opt/cgroup/01/child_b/child_bb + /opt/cgroup/01/child_c + + running new jobs in new group is also good. + + 9.6 Mount with other subsystems. + Mounting with other subsystems is a good test because there is a + race and lock dependency with other cgroup subsystems. + + example) + # mount -t cgroup none /cgroup -t cpuset,memory,cpu,devices + + and do task move, mkdir, rmdir etc...under this. diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt new file mode 100644 index 00000000000..e1501964df1 --- /dev/null +++ b/Documentation/cgroups/memory.txt @@ -0,0 +1,399 @@ +Memory Resource Controller + +NOTE: The Memory Resource Controller has been generically been referred +to as the memory controller in this document. Do not confuse memory controller +used here with the memory controller that is used in hardware. + +Salient features + +a. Enable control of both RSS (mapped) and Page Cache (unmapped) pages +b. The infrastructure allows easy addition of other types of memory to control +c. Provides *zero overhead* for non memory controller users +d. Provides a double LRU: global memory pressure causes reclaim from the + global LRU; a cgroup on hitting a limit, reclaims from the per + cgroup LRU + +NOTE: Swap Cache (unmapped) is not accounted now. + +Benefits and Purpose of the memory controller + +The memory controller isolates the memory behaviour of a group of tasks +from the rest of the system. The article on LWN [12] mentions some probable +uses of the memory controller. The memory controller can be used to + +a. Isolate an application or a group of applications + Memory hungry applications can be isolated and limited to a smaller + amount of memory. +b. Create a cgroup with limited amount of memory, this can be used + as a good alternative to booting with mem=XXXX. +c. Virtualization solutions can control the amount of memory they want + to assign to a virtual machine instance. +d. A CD/DVD burner could control the amount of memory used by the + rest of the system to ensure that burning does not fail due to lack + of available memory. +e. There are several other use cases, find one or use the controller just + for fun (to learn and hack on the VM subsystem). + +1. History + +The memory controller has a long history. A request for comments for the memory +controller was posted by Balbir Singh [1]. At the time the RFC was posted +there were several implementations for memory control. The goal of the +RFC was to build consensus and agreement for the minimal features required +for memory control. The first RSS controller was posted by Balbir Singh[2] +in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the +RSS controller. At OLS, at the resource management BoF, everyone suggested +that we handle both page cache and RSS together. Another request was raised +to allow user space handling of OOM. The current memory controller is +at version 6; it combines both mapped (RSS) and unmapped Page +Cache Control [11]. + +2. Memory Control + +Memory is a unique resource in the sense that it is present in a limited +amount. If a task requires a lot of CPU processing, the task can spread +its processing over a period of hours, days, months or years, but with +memory, the same physical memory needs to be reused to accomplish the task. + +The memory controller implementation has been divided into phases. These +are: + +1. Memory controller +2. mlock(2) controller +3. Kernel user memory accounting and slab control +4. user mappings length controller + +The memory controller is the first controller developed. + +2.1. Design + +The core of the design is a counter called the res_counter. The res_counter +tracks the current memory usage and limit of the group of processes associated +with the controller. Each cgroup has a memory controller specific data +structure (mem_cgroup) associated with it. + +2.2. Accounting + + +--------------------+ + | mem_cgroup | + | (res_counter) | + +--------------------+ + / ^ \ + / | \ + +---------------+ | +---------------+ + | mm_struct | |.... | mm_struct | + | | | | | + +---------------+ | +---------------+ + | + + --------------+ + | + +---------------+ +------+--------+ + | page +----------> page_cgroup| + | | | | + +---------------+ +---------------+ + + (Figure 1: Hierarchy of Accounting) + + +Figure 1 shows the important aspects of the controller + +1. Accounting happens per cgroup +2. Each mm_struct knows about which cgroup it belongs to +3. Each page has a pointer to the page_cgroup, which in turn knows the + cgroup it belongs to + +The accounting is done as follows: mem_cgroup_charge() is invoked to setup +the necessary data structures and check if the cgroup that is being charged +is over its limit. If it is then reclaim is invoked on the cgroup. +More details can be found in the reclaim section of this document. +If everything goes well, a page meta-data-structure called page_cgroup is +allocated and associated with the page. This routine also adds the page to +the per cgroup LRU. + +2.2.1 Accounting details + +All mapped anon pages (RSS) and cache pages (Page Cache) are accounted. +(some pages which never be reclaimable and will not be on global LRU + are not accounted. we just accounts pages under usual vm management.) + +RSS pages are accounted at page_fault unless they've already been accounted +for earlier. A file page will be accounted for as Page Cache when it's +inserted into inode (radix-tree). While it's mapped into the page tables of +processes, duplicate accounting is carefully avoided. + +A RSS page is unaccounted when it's fully unmapped. A PageCache page is +unaccounted when it's removed from radix-tree. + +At page migration, accounting information is kept. + +Note: we just account pages-on-lru because our purpose is to control amount +of used pages. not-on-lru pages are tend to be out-of-control from vm view. + +2.3 Shared Page Accounting + +Shared pages are accounted on the basis of the first touch approach. The +cgroup that first touches a page is accounted for the page. The principle +behind this approach is that a cgroup that aggressively uses a shared +page will eventually get charged for it (once it is uncharged from +the cgroup that brought it in -- this will happen on memory pressure). + +Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used.. +When you do swapoff and make swapped-out pages of shmem(tmpfs) to +be backed into memory in force, charges for pages are accounted against the +caller of swapoff rather than the users of shmem. + + +2.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP) +Swap Extension allows you to record charge for swap. A swapped-in page is +charged back to original page allocator if possible. + +When swap is accounted, following files are added. + - memory.memsw.usage_in_bytes. + - memory.memsw.limit_in_bytes. + +usage of mem+swap is limited by memsw.limit_in_bytes. + +Note: why 'mem+swap' rather than swap. +The global LRU(kswapd) can swap out arbitrary pages. Swap-out means +to move account from memory to swap...there is no change in usage of +mem+swap. + +In other words, when we want to limit the usage of swap without affecting +global LRU, mem+swap limit is better than just limiting swap from OS point +of view. + +2.5 Reclaim + +Each cgroup maintains a per cgroup LRU that consists of an active +and inactive list. When a cgroup goes over its limit, we first try +to reclaim memory from the cgroup so as to make space for the new +pages that the cgroup has touched. If the reclaim is unsuccessful, +an OOM routine is invoked to select and kill the bulkiest task in the +cgroup. + +The reclaim algorithm has not been modified for cgroups, except that +pages that are selected for reclaiming come from the per cgroup LRU +list. + +2. Locking + +The memory controller uses the following hierarchy + +1. zone->lru_lock is used for selecting pages to be isolated +2. mem->per_zone->lru_lock protects the per cgroup LRU (per zone) +3. lock_page_cgroup() is used to protect page->page_cgroup + +3. User Interface + +0. Configuration + +a. Enable CONFIG_CGROUPS +b. Enable CONFIG_RESOURCE_COUNTERS +c. Enable CONFIG_CGROUP_MEM_RES_CTLR + +1. Prepare the cgroups +# mkdir -p /cgroups +# mount -t cgroup none /cgroups -o memory + +2. Make the new group and move bash into it +# mkdir /cgroups/0 +# echo $$ > /cgroups/0/tasks + +Since now we're in the 0 cgroup, +We can alter the memory limit: +# echo 4M > /cgroups/0/memory.limit_in_bytes + +NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, +mega or gigabytes. + +# cat /cgroups/0/memory.limit_in_bytes +4194304 + +NOTE: The interface has now changed to display the usage in bytes +instead of pages + +We can check the usage: +# cat /cgroups/0/memory.usage_in_bytes +1216512 + +A successful write to this file does not guarantee a successful set of +this limit to the value written into the file. This can be due to a +number of factors, such as rounding up to page boundaries or the total +availability of memory on the system. The user is required to re-read +this file after a write to guarantee the value committed by the kernel. + +# echo 1 > memory.limit_in_bytes +# cat memory.limit_in_bytes +4096 + +The memory.failcnt field gives the number of times that the cgroup limit was +exceeded. + +The memory.stat file gives accounting information. Now, the number of +caches, RSS and Active pages/Inactive pages are shown. + +4. Testing + +Balbir posted lmbench, AIM9, LTP and vmmstress results [10] and [11]. +Apart from that v6 has been tested with several applications and regular +daily use. The controller has also been tested on the PPC64, x86_64 and +UML platforms. + +4.1 Troubleshooting + +Sometimes a user might find that the application under a cgroup is +terminated. There are several causes for this: + +1. The cgroup limit is too low (just too low to do anything useful) +2. The user is using anonymous memory and swap is turned off or too low + +A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of +some of the pages cached in the cgroup (page cache pages). + +4.2 Task migration + +When a task migrates from one cgroup to another, it's charge is not +carried forward. The pages allocated from the original cgroup still +remain charged to it, the charge is dropped when the page is freed or +reclaimed. + +4.3 Removing a cgroup + +A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a +cgroup might have some charge associated with it, even though all +tasks have migrated away from it. +Such charges are freed(at default) or moved to its parent. When moved, +both of RSS and CACHES are moved to parent. +If both of them are busy, rmdir() returns -EBUSY. See 5.1 Also. + +Charges recorded in swap information is not updated at removal of cgroup. +Recorded information is discarded and a cgroup which uses swap (swapcache) +will be charged as a new owner of it. + + +5. Misc. interfaces. + +5.1 force_empty + memory.force_empty interface is provided to make cgroup's memory usage empty. + You can use this interface only when the cgroup has no tasks. + When writing anything to this + + # echo 0 > memory.force_empty + + Almost all pages tracked by this memcg will be unmapped and freed. Some of + pages cannot be freed because it's locked or in-use. Such pages are moved + to parent and this cgroup will be empty. But this may return -EBUSY in + some too busy case. + + Typical use case of this interface is that calling this before rmdir(). + Because rmdir() moves all pages to parent, some out-of-use page caches can be + moved to the parent. If you want to avoid that, force_empty will be useful. + +5.2 stat file + memory.stat file includes following statistics (now) + cache - # of pages from page-cache and shmem. + rss - # of pages from anonymous memory. + pgpgin - # of event of charging + pgpgout - # of event of uncharging + active_anon - # of pages on active lru of anon, shmem. + inactive_anon - # of pages on active lru of anon, shmem + active_file - # of pages on active lru of file-cache + inactive_file - # of pages on inactive lru of file cache + unevictable - # of pages cannot be reclaimed.(mlocked etc) + + Below is depend on CONFIG_DEBUG_VM. + inactive_ratio - VM inernal parameter. (see mm/page_alloc.c) + recent_rotated_anon - VM internal parameter. (see mm/vmscan.c) + recent_rotated_file - VM internal parameter. (see mm/vmscan.c) + recent_scanned_anon - VM internal parameter. (see mm/vmscan.c) + recent_scanned_file - VM internal parameter. (see mm/vmscan.c) + + Memo: + recent_rotated means recent frequency of lru rotation. + recent_scanned means recent # of scans to lru. + showing for better debug please see the code for meanings. + + +5.3 swappiness + Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. + + Following cgroup's swapiness can't be changed. + - root cgroup (uses /proc/sys/vm/swappiness). + - a cgroup which uses hierarchy and it has child cgroup. + - a cgroup which uses hierarchy and not the root of hierarchy. + + +6. Hierarchy support + +The memory controller supports a deep hierarchy and hierarchical accounting. +The hierarchy is created by creating the appropriate cgroups in the +cgroup filesystem. Consider for example, the following cgroup filesystem +hierarchy + + root + / | \ + / | \ + a b c + | \ + | \ + d e + +In the diagram above, with hierarchical accounting enabled, all memory +usage of e, is accounted to its ancestors up until the root (i.e, c and root), +that has memory.use_hierarchy enabled. If one of the ancestors goes over its +limit, the reclaim algorithm reclaims from the tasks in the ancestor and the +children of the ancestor. + +6.1 Enabling hierarchical accounting and reclaim + +The memory controller by default disables the hierarchy feature. Support +can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup + +# echo 1 > memory.use_hierarchy + +The feature can be disabled by + +# echo 0 > memory.use_hierarchy + +NOTE1: Enabling/disabling will fail if the cgroup already has other +cgroups created below it. + +NOTE2: This feature can be enabled/disabled per subtree. + +7. TODO + +1. Add support for accounting huge pages (as a separate controller) +2. Make per-cgroup scanner reclaim not-shared pages first +3. Teach controller to account for shared-pages +4. Start reclamation in the background when the limit is + not yet hit but the usage is getting closer + +Summary + +Overall, the memory controller has been a stable controller and has been +commented and discussed quite extensively in the community. + +References + +1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/ +2. Singh, Balbir. Memory Controller (RSS Control), + http://lwn.net/Articles/222762/ +3. Emelianov, Pavel. Resource controllers based on process cgroups + http://lkml.org/lkml/2007/3/6/198 +4. Emelianov, Pavel. RSS controller based on process cgroups (v2) + http://lkml.org/lkml/2007/4/9/78 +5. Emelianov, Pavel. RSS controller based on process cgroups (v3) + http://lkml.org/lkml/2007/5/30/244 +6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/ +7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control + subsystem (v3), http://lwn.net/Articles/235534/ +8. Singh, Balbir. RSS controller v2 test results (lmbench), + http://lkml.org/lkml/2007/5/17/232 +9. Singh, Balbir. RSS controller v2 AIM9 results + http://lkml.org/lkml/2007/5/18/1 +10. Singh, Balbir. Memory controller v6 test results, + http://lkml.org/lkml/2007/8/19/36 +11. Singh, Balbir. Memory controller introduction (v6), + http://lkml.org/lkml/2007/8/17/69 +12. Corbet, Jonathan, Controlling memory use in cgroups, + http://lwn.net/Articles/243795/ diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt new file mode 100644 index 00000000000..f196ac1d7d2 --- /dev/null +++ b/Documentation/cgroups/resource_counter.txt @@ -0,0 +1,181 @@ + + The Resource Counter + +The resource counter, declared at include/linux/res_counter.h, +is supposed to facilitate the resource management by controllers +by providing common stuff for accounting. + +This "stuff" includes the res_counter structure and routines +to work with it. + + + +1. Crucial parts of the res_counter structure + + a. unsigned long long usage + + The usage value shows the amount of a resource that is consumed + by a group at a given time. The units of measurement should be + determined by the controller that uses this counter. E.g. it can + be bytes, items or any other unit the controller operates on. + + b. unsigned long long max_usage + + The maximal value of the usage over time. + + This value is useful when gathering statistical information about + the particular group, as it shows the actual resource requirements + for a particular group, not just some usage snapshot. + + c. unsigned long long limit + + The maximal allowed amount of resource to consume by the group. In + case the group requests for more resources, so that the usage value + would exceed the limit, the resource allocation is rejected (see + the next section). + + d. unsigned long long failcnt + + The failcnt stands for "failures counter". This is the number of + resource allocation attempts that failed. + + c. spinlock_t lock + + Protects changes of the above values. + + + +2. Basic accounting routines + + a. void res_counter_init(struct res_counter *rc) + + Initializes the resource counter. As usual, should be the first + routine called for a new counter. + + b. int res_counter_charge[_locked] + (struct res_counter *rc, unsigned long val) + + When a resource is about to be allocated it has to be accounted + with the appropriate resource counter (controller should determine + which one to use on its own). This operation is called "charging". + + This is not very important which operation - resource allocation + or charging - is performed first, but + * if the allocation is performed first, this may create a + temporary resource over-usage by the time resource counter is + charged; + * if the charging is performed first, then it should be uncharged + on error path (if the one is called). + + c. void res_counter_uncharge[_locked] + (struct res_counter *rc, unsigned long val) + + When a resource is released (freed) it should be de-accounted + from the resource counter it was accounted to. This is called + "uncharging". + + The _locked routines imply that the res_counter->lock is taken. + + + 2.1 Other accounting routines + + There are more routines that may help you with common needs, like + checking whether the limit is reached or resetting the max_usage + value. They are all declared in include/linux/res_counter.h. + + + +3. Analyzing the resource counter registrations + + a. If the failcnt value constantly grows, this means that the counter's + limit is too tight. Either the group is misbehaving and consumes too + many resources, or the configuration is not suitable for the group + and the limit should be increased. + + b. The max_usage value can be used to quickly tune the group. One may + set the limits to maximal values and either load the container with + a common pattern or leave one for a while. After this the max_usage + value shows the amount of memory the container would require during + its common activity. + + Setting the limit a bit above this value gives a pretty good + configuration that works in most of the cases. + + c. If the max_usage is much less than the limit, but the failcnt value + is growing, then the group tries to allocate a big chunk of resource + at once. + + d. If the max_usage is much less than the limit, but the failcnt value + is 0, then this group is given too high limit, that it does not + require. It is better to lower the limit a bit leaving more resource + for other groups. + + + +4. Communication with the control groups subsystem (cgroups) + +All the resource controllers that are using cgroups and resource counters +should provide files (in the cgroup filesystem) to work with the resource +counter fields. They are recommended to adhere to the following rules: + + a. File names + + Field name File name + --------------------------------------------------- + usage usage_in_ + max_usage max_usage_in_ + limit limit_in_ + failcnt failcnt + lock no file :) + + b. Reading from file should show the corresponding field value in the + appropriate format. + + c. Writing to file + + Field Expected behavior + ---------------------------------- + usage prohibited + max_usage reset to usage + limit set the limit + failcnt reset to zero + + + +5. Usage example + + a. Declare a task group (take a look at cgroups subsystem for this) and + fold a res_counter into it + + struct my_group { + struct res_counter res; + + + } + + b. Put hooks in resource allocation/release paths + + int alloc_something(...) + { + if (res_counter_charge(res_counter_ptr, amount) < 0) + return -ENOMEM; + + + } + + void release_something(...) + { + res_counter_uncharge(res_counter_ptr, amount); + + + } + + In order to keep the usage value self-consistent, both the + "res_counter_ptr" and the "amount" in release_something() should be + the same as they were in the alloc_something() when the releasing + resource was allocated. + + c. Provide the way to read res_counter values and set them (the cgroups + still can help with it). + + c. Compile and run :) diff --git a/Documentation/controllers/cpuacct.txt b/Documentation/controllers/cpuacct.txt deleted file mode 100644 index bb775fbe43d..00000000000 --- a/Documentation/controllers/cpuacct.txt +++ /dev/null @@ -1,32 +0,0 @@ -CPU Accounting Controller -------------------------- - -The CPU accounting controller is used to group tasks using cgroups and -account the CPU usage of these groups of tasks. - -The CPU accounting controller supports multi-hierarchy groups. An accounting -group accumulates the CPU usage of all of its child groups and the tasks -directly present in its group. - -Accounting groups can be created by first mounting the cgroup filesystem. - -# mkdir /cgroups -# mount -t cgroup -ocpuacct none /cgroups - -With the above step, the initial or the parent accounting group -becomes visible at /cgroups. At bootup, this group includes all the -tasks in the system. /cgroups/tasks lists the tasks in this cgroup. -/cgroups/cpuacct.usage gives the CPU time (in nanoseconds) obtained by -this group which is essentially the CPU time obtained by all the tasks -in the system. - -New accounting groups can be created under the parent group /cgroups. - -# cd /cgroups -# mkdir g1 -# echo $$ > g1 - -The above steps create a new group g1 and move the current shell -process (bash) into it. CPU time consumed by this bash and its children -can be obtained from g1/cpuacct.usage and the same is accumulated in -/cgroups/cpuacct.usage also. diff --git a/Documentation/controllers/devices.txt b/Documentation/controllers/devices.txt deleted file mode 100644 index 7cc6e6a6067..00000000000 --- a/Documentation/controllers/devices.txt +++ /dev/null @@ -1,52 +0,0 @@ -Device Whitelist Controller - -1. Description: - -Implement a cgroup to track and enforce open and mknod restrictions -on device files. A device cgroup associates a device access -whitelist with each cgroup. A whitelist entry has 4 fields. -'type' is a (all), c (char), or b (block). 'all' means it applies -to all types and all major and minor numbers. Major and minor are -either an integer or * for all. Access is a composition of r -(read), w (write), and m (mknod). - -The root device cgroup starts with rwm to 'all'. A child device -cgroup gets a copy of the parent. Administrators can then remove -devices from the whitelist or add new entries. A child cgroup can -never receive a device access which is denied by its parent. However -when a device access is removed from a parent it will not also be -removed from the child(ren). - -2. User Interface - -An entry is added using devices.allow, and removed using -devices.deny. For instance - - echo 'c 1:3 mr' > /cgroups/1/devices.allow - -allows cgroup 1 to read and mknod the device usually known as -/dev/null. Doing - - echo a > /cgroups/1/devices.deny - -will remove the default 'a *:* rwm' entry. Doing - - echo a > /cgroups/1/devices.allow - -will add the 'a *:* rwm' entry to the whitelist. - -3. Security - -Any task can move itself between cgroups. This clearly won't -suffice, but we can decide the best way to adequately restrict -movement as people get some experience with this. We may just want -to require CAP_SYS_ADMIN, which at least is a separate bit from -CAP_MKNOD. We may want to just refuse moving to a cgroup which -isn't a descendent of the current one. Or we may want to use -CAP_MAC_ADMIN, since we really are trying to lock down root. - -CAP_SYS_ADMIN is needed to modify the whitelist or move another -task to a new cgroup. (Again we'll probably want to change that). - -A cgroup may not be granted more permissions than the cgroup's -parent has. diff --git a/Documentation/controllers/memcg_test.txt b/Documentation/controllers/memcg_test.txt deleted file mode 100644 index 08d4d3ea0d7..00000000000 --- a/Documentation/controllers/memcg_test.txt +++ /dev/null @@ -1,342 +0,0 @@ -Memory Resource Controller(Memcg) Implementation Memo. -Last Updated: 2008/12/15 -Base Kernel Version: based on 2.6.28-rc8-mm. - -Because VM is getting complex (one of reasons is memcg...), memcg's behavior -is complex. This is a document for memcg's internal behavior. -Please note that implementation details can be changed. - -(*) Topics on API should be in Documentation/controllers/memory.txt) - -0. How to record usage ? - 2 objects are used. - - page_cgroup ....an object per page. - Allocated at boot or memory hotplug. Freed at memory hot removal. - - swap_cgroup ... an entry per swp_entry. - Allocated at swapon(). Freed at swapoff(). - - The page_cgroup has USED bit and double count against a page_cgroup never - occurs. swap_cgroup is used only when a charged page is swapped-out. - -1. Charge - - a page/swp_entry may be charged (usage += PAGE_SIZE) at - - mem_cgroup_newpage_charge() - Called at new page fault and Copy-On-Write. - - mem_cgroup_try_charge_swapin() - Called at do_swap_page() (page fault on swap entry) and swapoff. - Followed by charge-commit-cancel protocol. (With swap accounting) - At commit, a charge recorded in swap_cgroup is removed. - - mem_cgroup_cache_charge() - Called at add_to_page_cache() - - mem_cgroup_cache_charge_swapin() - Called at shmem's swapin. - - mem_cgroup_prepare_migration() - Called before migration. "extra" charge is done and followed by - charge-commit-cancel protocol. - At commit, charge against oldpage or newpage will be committed. - -2. Uncharge - a page/swp_entry may be uncharged (usage -= PAGE_SIZE) by - - mem_cgroup_uncharge_page() - Called when an anonymous page is fully unmapped. I.e., mapcount goes - to 0. If the page is SwapCache, uncharge is delayed until - mem_cgroup_uncharge_swapcache(). - - mem_cgroup_uncharge_cache_page() - Called when a page-cache is deleted from radix-tree. If the page is - SwapCache, uncharge is delayed until mem_cgroup_uncharge_swapcache(). - - mem_cgroup_uncharge_swapcache() - Called when SwapCache is removed from radix-tree. The charge itself - is moved to swap_cgroup. (If mem+swap controller is disabled, no - charge to swap occurs.) - - mem_cgroup_uncharge_swap() - Called when swp_entry's refcnt goes down to 0. A charge against swap - disappears. - - mem_cgroup_end_migration(old, new) - At success of migration old is uncharged (if necessary), a charge - to new page is committed. At failure, charge to old page is committed. - -3. charge-commit-cancel - In some case, we can't know this "charge" is valid or not at charging - (because of races). - To handle such case, there are charge-commit-cancel functions. - mem_cgroup_try_charge_XXX - mem_cgroup_commit_charge_XXX - mem_cgroup_cancel_charge_XXX - these are used in swap-in and migration. - - At try_charge(), there are no flags to say "this page is charged". - at this point, usage += PAGE_SIZE. - - At commit(), the function checks the page should be charged or not - and set flags or avoid charging.(usage -= PAGE_SIZE) - - At cancel(), simply usage -= PAGE_SIZE. - -Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y. - -4. Anonymous - Anonymous page is newly allocated at - - page fault into MAP_ANONYMOUS mapping. - - Copy-On-Write. - It is charged right after it's allocated before doing any page table - related operations. Of course, it's uncharged when another page is used - for the fault address. - - At freeing anonymous page (by exit() or munmap()), zap_pte() is called - and pages for ptes are freed one by one.(see mm/memory.c). Uncharges - are done at page_remove_rmap() when page_mapcount() goes down to 0. - - Another page freeing is by page-reclaim (vmscan.c) and anonymous - pages are swapped out. In this case, the page is marked as - PageSwapCache(). uncharge() routine doesn't uncharge the page marked - as SwapCache(). It's delayed until __delete_from_swap_cache(). - - 4.1 Swap-in. - At swap-in, the page is taken from swap-cache. There are 2 cases. - - (a) If the SwapCache is newly allocated and read, it has no charges. - (b) If the SwapCache has been mapped by processes, it has been - charged already. - - This swap-in is one of the most complicated work. In do_swap_page(), - following events occur when pte is unchanged. - - (1) the page (SwapCache) is looked up. - (2) lock_page() - (3) try_charge_swapin() - (4) reuse_swap_page() (may call delete_swap_cache()) - (5) commit_charge_swapin() - (6) swap_free(). - - Considering following situation for example. - - (A) The page has not been charged before (2) and reuse_swap_page() - doesn't call delete_from_swap_cache(). - (B) The page has not been charged before (2) and reuse_swap_page() - calls delete_from_swap_cache(). - (C) The page has been charged before (2) and reuse_swap_page() doesn't - call delete_from_swap_cache(). - (D) The page has been charged before (2) and reuse_swap_page() calls - delete_from_swap_cache(). - - memory.usage/memsw.usage changes to this page/swp_entry will be - Case (A) (B) (C) (D) - Event - Before (2) 0/ 1 0/ 1 1/ 1 1/ 1 - =========================================== - (3) +1/+1 +1/+1 +1/+1 +1/+1 - (4) - 0/ 0 - -1/ 0 - (5) 0/-1 0/ 0 -1/-1 0/ 0 - (6) - 0/-1 - 0/-1 - =========================================== - Result 1/ 1 1/ 1 1/ 1 1/ 1 - - In any cases, charges to this page should be 1/ 1. - - 4.2 Swap-out. - At swap-out, typical state transition is below. - - (a) add to swap cache. (marked as SwapCache) - swp_entry's refcnt += 1. - (b) fully unmapped. - swp_entry's refcnt += # of ptes. - (c) write back to swap. - (d) delete from swap cache. (remove from SwapCache) - swp_entry's refcnt -= 1. - - - At (b), the page is marked as SwapCache and not uncharged. - At (d), the page is removed from SwapCache and a charge in page_cgroup - is moved to swap_cgroup. - - Finally, at task exit, - (e) zap_pte() is called and swp_entry's refcnt -=1 -> 0. - Here, a charge in swap_cgroup disappears. - -5. Page Cache - Page Cache is charged at - - add_to_page_cache_locked(). - - uncharged at - - __remove_from_page_cache(). - - The logic is very clear. (About migration, see below) - Note: __remove_from_page_cache() is called by remove_from_page_cache() - and __remove_mapping(). - -6. Shmem(tmpfs) Page Cache - Memcg's charge/uncharge have special handlers of shmem. The best way - to understand shmem's page state transition is to read mm/shmem.c. - But brief explanation of the behavior of memcg around shmem will be - helpful to understand the logic. - - Shmem's page (just leaf page, not direct/indirect block) can be on - - radix-tree of shmem's inode. - - SwapCache. - - Both on radix-tree and SwapCache. This happens at swap-in - and swap-out, - - It's charged when... - - A new page is added to shmem's radix-tree. - - A swp page is read. (move a charge from swap_cgroup to page_cgroup) - It's uncharged when - - A page is removed from radix-tree and not SwapCache. - - When SwapCache is removed, a charge is moved to swap_cgroup. - - When swp_entry's refcnt goes down to 0, a charge in swap_cgroup - disappears. - -7. Page Migration - One of the most complicated functions is page-migration-handler. - Memcg has 2 routines. Assume that we are migrating a page's contents - from OLDPAGE to NEWPAGE. - - Usual migration logic is.. - (a) remove the page from LRU. - (b) allocate NEWPAGE (migration target) - (c) lock by lock_page(). - (d) unmap all mappings. - (e-1) If necessary, replace entry in radix-tree. - (e-2) move contents of a page. - (f) map all mappings again. - (g) pushback the page to LRU. - (-) OLDPAGE will be freed. - - Before (g), memcg should complete all necessary charge/uncharge to - NEWPAGE/OLDPAGE. - - The point is.... - - If OLDPAGE is anonymous, all charges will be dropped at (d) because - try_to_unmap() drops all mapcount and the page will not be - SwapCache. - - - If OLDPAGE is SwapCache, charges will be kept at (g) because - __delete_from_swap_cache() isn't called at (e-1) - - - If OLDPAGE is page-cache, charges will be kept at (g) because - remove_from_swap_cache() isn't called at (e-1) - - memcg provides following hooks. - - - mem_cgroup_prepare_migration(OLDPAGE) - Called after (b) to account a charge (usage += PAGE_SIZE) against - memcg which OLDPAGE belongs to. - - - mem_cgroup_end_migration(OLDPAGE, NEWPAGE) - Called after (f) before (g). - If OLDPAGE is used, commit OLDPAGE again. If OLDPAGE is already - charged, a charge by prepare_migration() is automatically canceled. - If NEWPAGE is used, commit NEWPAGE and uncharge OLDPAGE. - - But zap_pte() (by exit or munmap) can be called while migration, - we have to check if OLDPAGE/NEWPAGE is a valid page after commit(). - -8. LRU - Each memcg has its own private LRU. Now, it's handling is under global - VM's control (means that it's handled under global zone->lru_lock). - Almost all routines around memcg's LRU is called by global LRU's - list management functions under zone->lru_lock(). - - A special function is mem_cgroup_isolate_pages(). This scans - memcg's private LRU and call __isolate_lru_page() to extract a page - from LRU. - (By __isolate_lru_page(), the page is removed from both of global and - private LRU.) - - -9. Typical Tests. - - Tests for racy cases. - - 9.1 Small limit to memcg. - When you do test to do racy case, it's good test to set memcg's limit - to be very small rather than GB. Many races found in the test under - xKB or xxMB limits. - (Memory behavior under GB and Memory behavior under MB shows very - different situation.) - - 9.2 Shmem - Historically, memcg's shmem handling was poor and we saw some amount - of troubles here. This is because shmem is page-cache but can be - SwapCache. Test with shmem/tmpfs is always good test. - - 9.3 Migration - For NUMA, migration is an another special case. To do easy test, cpuset - is useful. Following is a sample script to do migration. - - mount -t cgroup -o cpuset none /opt/cpuset - - mkdir /opt/cpuset/01 - echo 1 > /opt/cpuset/01/cpuset.cpus - echo 0 > /opt/cpuset/01/cpuset.mems - echo 1 > /opt/cpuset/01/cpuset.memory_migrate - mkdir /opt/cpuset/02 - echo 1 > /opt/cpuset/02/cpuset.cpus - echo 1 > /opt/cpuset/02/cpuset.mems - echo 1 > /opt/cpuset/02/cpuset.memory_migrate - - In above set, when you moves a task from 01 to 02, page migration to - node 0 to node 1 will occur. Following is a script to migrate all - under cpuset. - -- - move_task() - { - for pid in $1 - do - /bin/echo $pid >$2/tasks 2>/dev/null - echo -n $pid - echo -n " " - done - echo END - } - - G1_TASK=`cat ${G1}/tasks` - G2_TASK=`cat ${G2}/tasks` - move_task "${G1_TASK}" ${G2} & - -- - 9.4 Memory hotplug. - memory hotplug test is one of good test. - to offline memory, do following. - # echo offline > /sys/devices/system/memory/memoryXXX/state - (XXX is the place of memory) - This is an easy way to test page migration, too. - - 9.5 mkdir/rmdir - When using hierarchy, mkdir/rmdir test should be done. - Use tests like the following. - - echo 1 >/opt/cgroup/01/memory/use_hierarchy - mkdir /opt/cgroup/01/child_a - mkdir /opt/cgroup/01/child_b - - set limit to 01. - add limit to 01/child_b - run jobs under child_a and child_b - - create/delete following groups at random while jobs are running. - /opt/cgroup/01/child_a/child_aa - /opt/cgroup/01/child_b/child_bb - /opt/cgroup/01/child_c - - running new jobs in new group is also good. - - 9.6 Mount with other subsystems. - Mounting with other subsystems is a good test because there is a - race and lock dependency with other cgroup subsystems. - - example) - # mount -t cgroup none /cgroup -t cpuset,memory,cpu,devices - - and do task move, mkdir, rmdir etc...under this. diff --git a/Documentation/controllers/memory.txt b/Documentation/controllers/memory.txt deleted file mode 100644 index e1501964df1..00000000000 --- a/Documentation/controllers/memory.txt +++ /dev/null @@ -1,399 +0,0 @@ -Memory Resource Controller - -NOTE: The Memory Resource Controller has been generically been referred -to as the memory controller in this document. Do not confuse memory controller -used here with the memory controller that is used in hardware. - -Salient features - -a. Enable control of both RSS (mapped) and Page Cache (unmapped) pages -b. The infrastructure allows easy addition of other types of memory to control -c. Provides *zero overhead* for non memory controller users -d. Provides a double LRU: global memory pressure causes reclaim from the - global LRU; a cgroup on hitting a limit, reclaims from the per - cgroup LRU - -NOTE: Swap Cache (unmapped) is not accounted now. - -Benefits and Purpose of the memory controller - -The memory controller isolates the memory behaviour of a group of tasks -from the rest of the system. The article on LWN [12] mentions some probable -uses of the memory controller. The memory controller can be used to - -a. Isolate an application or a group of applications - Memory hungry applications can be isolated and limited to a smaller - amount of memory. -b. Create a cgroup with limited amount of memory, this can be used - as a good alternative to booting with mem=XXXX. -c. Virtualization solutions can control the amount of memory they want - to assign to a virtual machine instance. -d. A CD/DVD burner could control the amount of memory used by the - rest of the system to ensure that burning does not fail due to lack - of available memory. -e. There are several other use cases, find one or use the controller just - for fun (to learn and hack on the VM subsystem). - -1. History - -The memory controller has a long history. A request for comments for the memory -controller was posted by Balbir Singh [1]. At the time the RFC was posted -there were several implementations for memory control. The goal of the -RFC was to build consensus and agreement for the minimal features required -for memory control. The first RSS controller was posted by Balbir Singh[2] -in Feb 2007. Pavel Emelianov [3][4][5] has since posted three versions of the -RSS controller. At OLS, at the resource management BoF, everyone suggested -that we handle both page cache and RSS together. Another request was raised -to allow user space handling of OOM. The current memory controller is -at version 6; it combines both mapped (RSS) and unmapped Page -Cache Control [11]. - -2. Memory Control - -Memory is a unique resource in the sense that it is present in a limited -amount. If a task requires a lot of CPU processing, the task can spread -its processing over a period of hours, days, months or years, but with -memory, the same physical memory needs to be reused to accomplish the task. - -The memory controller implementation has been divided into phases. These -are: - -1. Memory controller -2. mlock(2) controller -3. Kernel user memory accounting and slab control -4. user mappings length controller - -The memory controller is the first controller developed. - -2.1. Design - -The core of the design is a counter called the res_counter. The res_counter -tracks the current memory usage and limit of the group of processes associated -with the controller. Each cgroup has a memory controller specific data -structure (mem_cgroup) associated with it. - -2.2. Accounting - - +--------------------+ - | mem_cgroup | - | (res_counter) | - +--------------------+ - / ^ \ - / | \ - +---------------+ | +---------------+ - | mm_struct | |.... | mm_struct | - | | | | | - +---------------+ | +---------------+ - | - + --------------+ - | - +---------------+ +------+--------+ - | page +----------> page_cgroup| - | | | | - +---------------+ +---------------+ - - (Figure 1: Hierarchy of Accounting) - - -Figure 1 shows the important aspects of the controller - -1. Accounting happens per cgroup -2. Each mm_struct knows about which cgroup it belongs to -3. Each page has a pointer to the page_cgroup, which in turn knows the - cgroup it belongs to - -The accounting is done as follows: mem_cgroup_charge() is invoked to setup -the necessary data structures and check if the cgroup that is being charged -is over its limit. If it is then reclaim is invoked on the cgroup. -More details can be found in the reclaim section of this document. -If everything goes well, a page meta-data-structure called page_cgroup is -allocated and associated with the page. This routine also adds the page to -the per cgroup LRU. - -2.2.1 Accounting details - -All mapped anon pages (RSS) and cache pages (Page Cache) are accounted. -(some pages which never be reclaimable and will not be on global LRU - are not accounted. we just accounts pages under usual vm management.) - -RSS pages are accounted at page_fault unless they've already been accounted -for earlier. A file page will be accounted for as Page Cache when it's -inserted into inode (radix-tree). While it's mapped into the page tables of -processes, duplicate accounting is carefully avoided. - -A RSS page is unaccounted when it's fully unmapped. A PageCache page is -unaccounted when it's removed from radix-tree. - -At page migration, accounting information is kept. - -Note: we just account pages-on-lru because our purpose is to control amount -of used pages. not-on-lru pages are tend to be out-of-control from vm view. - -2.3 Shared Page Accounting - -Shared pages are accounted on the basis of the first touch approach. The -cgroup that first touches a page is accounted for the page. The principle -behind this approach is that a cgroup that aggressively uses a shared -page will eventually get charged for it (once it is uncharged from -the cgroup that brought it in -- this will happen on memory pressure). - -Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used.. -When you do swapoff and make swapped-out pages of shmem(tmpfs) to -be backed into memory in force, charges for pages are accounted against the -caller of swapoff rather than the users of shmem. - - -2.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP) -Swap Extension allows you to record charge for swap. A swapped-in page is -charged back to original page allocator if possible. - -When swap is accounted, following files are added. - - memory.memsw.usage_in_bytes. - - memory.memsw.limit_in_bytes. - -usage of mem+swap is limited by memsw.limit_in_bytes. - -Note: why 'mem+swap' rather than swap. -The global LRU(kswapd) can swap out arbitrary pages. Swap-out means -to move account from memory to swap...there is no change in usage of -mem+swap. - -In other words, when we want to limit the usage of swap without affecting -global LRU, mem+swap limit is better than just limiting swap from OS point -of view. - -2.5 Reclaim - -Each cgroup maintains a per cgroup LRU that consists of an active -and inactive list. When a cgroup goes over its limit, we first try -to reclaim memory from the cgroup so as to make space for the new -pages that the cgroup has touched. If the reclaim is unsuccessful, -an OOM routine is invoked to select and kill the bulkiest task in the -cgroup. - -The reclaim algorithm has not been modified for cgroups, except that -pages that are selected for reclaiming come from the per cgroup LRU -list. - -2. Locking - -The memory controller uses the following hierarchy - -1. zone->lru_lock is used for selecting pages to be isolated -2. mem->per_zone->lru_lock protects the per cgroup LRU (per zone) -3. lock_page_cgroup() is used to protect page->page_cgroup - -3. User Interface - -0. Configuration - -a. Enable CONFIG_CGROUPS -b. Enable CONFIG_RESOURCE_COUNTERS -c. Enable CONFIG_CGROUP_MEM_RES_CTLR - -1. Prepare the cgroups -# mkdir -p /cgroups -# mount -t cgroup none /cgroups -o memory - -2. Make the new group and move bash into it -# mkdir /cgroups/0 -# echo $$ > /cgroups/0/tasks - -Since now we're in the 0 cgroup, -We can alter the memory limit: -# echo 4M > /cgroups/0/memory.limit_in_bytes - -NOTE: We can use a suffix (k, K, m, M, g or G) to indicate values in kilo, -mega or gigabytes. - -# cat /cgroups/0/memory.limit_in_bytes -4194304 - -NOTE: The interface has now changed to display the usage in bytes -instead of pages - -We can check the usage: -# cat /cgroups/0/memory.usage_in_bytes -1216512 - -A successful write to this file does not guarantee a successful set of -this limit to the value written into the file. This can be due to a -number of factors, such as rounding up to page boundaries or the total -availability of memory on the system. The user is required to re-read -this file after a write to guarantee the value committed by the kernel. - -# echo 1 > memory.limit_in_bytes -# cat memory.limit_in_bytes -4096 - -The memory.failcnt field gives the number of times that the cgroup limit was -exceeded. - -The memory.stat file gives accounting information. Now, the number of -caches, RSS and Active pages/Inactive pages are shown. - -4. Testing - -Balbir posted lmbench, AIM9, LTP and vmmstress results [10] and [11]. -Apart from that v6 has been tested with several applications and regular -daily use. The controller has also been tested on the PPC64, x86_64 and -UML platforms. - -4.1 Troubleshooting - -Sometimes a user might find that the application under a cgroup is -terminated. There are several causes for this: - -1. The cgroup limit is too low (just too low to do anything useful) -2. The user is using anonymous memory and swap is turned off or too low - -A sync followed by echo 1 > /proc/sys/vm/drop_caches will help get rid of -some of the pages cached in the cgroup (page cache pages). - -4.2 Task migration - -When a task migrates from one cgroup to another, it's charge is not -carried forward. The pages allocated from the original cgroup still -remain charged to it, the charge is dropped when the page is freed or -reclaimed. - -4.3 Removing a cgroup - -A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a -cgroup might have some charge associated with it, even though all -tasks have migrated away from it. -Such charges are freed(at default) or moved to its parent. When moved, -both of RSS and CACHES are moved to parent. -If both of them are busy, rmdir() returns -EBUSY. See 5.1 Also. - -Charges recorded in swap information is not updated at removal of cgroup. -Recorded information is discarded and a cgroup which uses swap (swapcache) -will be charged as a new owner of it. - - -5. Misc. interfaces. - -5.1 force_empty - memory.force_empty interface is provided to make cgroup's memory usage empty. - You can use this interface only when the cgroup has no tasks. - When writing anything to this - - # echo 0 > memory.force_empty - - Almost all pages tracked by this memcg will be unmapped and freed. Some of - pages cannot be freed because it's locked or in-use. Such pages are moved - to parent and this cgroup will be empty. But this may return -EBUSY in - some too busy case. - - Typical use case of this interface is that calling this before rmdir(). - Because rmdir() moves all pages to parent, some out-of-use page caches can be - moved to the parent. If you want to avoid that, force_empty will be useful. - -5.2 stat file - memory.stat file includes following statistics (now) - cache - # of pages from page-cache and shmem. - rss - # of pages from anonymous memory. - pgpgin - # of event of charging - pgpgout - # of event of uncharging - active_anon - # of pages on active lru of anon, shmem. - inactive_anon - # of pages on active lru of anon, shmem - active_file - # of pages on active lru of file-cache - inactive_file - # of pages on inactive lru of file cache - unevictable - # of pages cannot be reclaimed.(mlocked etc) - - Below is depend on CONFIG_DEBUG_VM. - inactive_ratio - VM inernal parameter. (see mm/page_alloc.c) - recent_rotated_anon - VM internal parameter. (see mm/vmscan.c) - recent_rotated_file - VM internal parameter. (see mm/vmscan.c) - recent_scanned_anon - VM internal parameter. (see mm/vmscan.c) - recent_scanned_file - VM internal parameter. (see mm/vmscan.c) - - Memo: - recent_rotated means recent frequency of lru rotation. - recent_scanned means recent # of scans to lru. - showing for better debug please see the code for meanings. - - -5.3 swappiness - Similar to /proc/sys/vm/swappiness, but affecting a hierarchy of groups only. - - Following cgroup's swapiness can't be changed. - - root cgroup (uses /proc/sys/vm/swappiness). - - a cgroup which uses hierarchy and it has child cgroup. - - a cgroup which uses hierarchy and not the root of hierarchy. - - -6. Hierarchy support - -The memory controller supports a deep hierarchy and hierarchical accounting. -The hierarchy is created by creating the appropriate cgroups in the -cgroup filesystem. Consider for example, the following cgroup filesystem -hierarchy - - root - / | \ - / | \ - a b c - | \ - | \ - d e - -In the diagram above, with hierarchical accounting enabled, all memory -usage of e, is accounted to its ancestors up until the root (i.e, c and root), -that has memory.use_hierarchy enabled. If one of the ancestors goes over its -limit, the reclaim algorithm reclaims from the tasks in the ancestor and the -children of the ancestor. - -6.1 Enabling hierarchical accounting and reclaim - -The memory controller by default disables the hierarchy feature. Support -can be enabled by writing 1 to memory.use_hierarchy file of the root cgroup - -# echo 1 > memory.use_hierarchy - -The feature can be disabled by - -# echo 0 > memory.use_hierarchy - -NOTE1: Enabling/disabling will fail if the cgroup already has other -cgroups created below it. - -NOTE2: This feature can be enabled/disabled per subtree. - -7. TODO - -1. Add support for accounting huge pages (as a separate controller) -2. Make per-cgroup scanner reclaim not-shared pages first -3. Teach controller to account for shared-pages -4. Start reclamation in the background when the limit is - not yet hit but the usage is getting closer - -Summary - -Overall, the memory controller has been a stable controller and has been -commented and discussed quite extensively in the community. - -References - -1. Singh, Balbir. RFC: Memory Controller, http://lwn.net/Articles/206697/ -2. Singh, Balbir. Memory Controller (RSS Control), - http://lwn.net/Articles/222762/ -3. Emelianov, Pavel. Resource controllers based on process cgroups - http://lkml.org/lkml/2007/3/6/198 -4. Emelianov, Pavel. RSS controller based on process cgroups (v2) - http://lkml.org/lkml/2007/4/9/78 -5. Emelianov, Pavel. RSS controller based on process cgroups (v3) - http://lkml.org/lkml/2007/5/30/244 -6. Menage, Paul. Control Groups v10, http://lwn.net/Articles/236032/ -7. Vaidyanathan, Srinivasan, Control Groups: Pagecache accounting and control - subsystem (v3), http://lwn.net/Articles/235534/ -8. Singh, Balbir. RSS controller v2 test results (lmbench), - http://lkml.org/lkml/2007/5/17/232 -9. Singh, Balbir. RSS controller v2 AIM9 results - http://lkml.org/lkml/2007/5/18/1 -10. Singh, Balbir. Memory controller v6 test results, - http://lkml.org/lkml/2007/8/19/36 -11. Singh, Balbir. Memory controller introduction (v6), - http://lkml.org/lkml/2007/8/17/69 -12. Corbet, Jonathan, Controlling memory use in cgroups, - http://lwn.net/Articles/243795/ diff --git a/Documentation/controllers/resource_counter.txt b/Documentation/controllers/resource_counter.txt deleted file mode 100644 index f196ac1d7d2..00000000000 --- a/Documentation/controllers/resource_counter.txt +++ /dev/null @@ -1,181 +0,0 @@ - - The Resource Counter - -The resource counter, declared at include/linux/res_counter.h, -is supposed to facilitate the resource management by controllers -by providing common stuff for accounting. - -This "stuff" includes the res_counter structure and routines -to work with it. - - - -1. Crucial parts of the res_counter structure - - a. unsigned long long usage - - The usage value shows the amount of a resource that is consumed - by a group at a given time. The units of measurement should be - determined by the controller that uses this counter. E.g. it can - be bytes, items or any other unit the controller operates on. - - b. unsigned long long max_usage - - The maximal value of the usage over time. - - This value is useful when gathering statistical information about - the particular group, as it shows the actual resource requirements - for a particular group, not just some usage snapshot. - - c. unsigned long long limit - - The maximal allowed amount of resource to consume by the group. In - case the group requests for more resources, so that the usage value - would exceed the limit, the resource allocation is rejected (see - the next section). - - d. unsigned long long failcnt - - The failcnt stands for "failures counter". This is the number of - resource allocation attempts that failed. - - c. spinlock_t lock - - Protects changes of the above values. - - - -2. Basic accounting routines - - a. void res_counter_init(struct res_counter *rc) - - Initializes the resource counter. As usual, should be the first - routine called for a new counter. - - b. int res_counter_charge[_locked] - (struct res_counter *rc, unsigned long val) - - When a resource is about to be allocated it has to be accounted - with the appropriate resource counter (controller should determine - which one to use on its own). This operation is called "charging". - - This is not very important which operation - resource allocation - or charging - is performed first, but - * if the allocation is performed first, this may create a - temporary resource over-usage by the time resource counter is - charged; - * if the charging is performed first, then it should be uncharged - on error path (if the one is called). - - c. void res_counter_uncharge[_locked] - (struct res_counter *rc, unsigned long val) - - When a resource is released (freed) it should be de-accounted - from the resource counter it was accounted to. This is called - "uncharging". - - The _locked routines imply that the res_counter->lock is taken. - - - 2.1 Other accounting routines - - There are more routines that may help you with common needs, like - checking whether the limit is reached or resetting the max_usage - value. They are all declared in include/linux/res_counter.h. - - - -3. Analyzing the resource counter registrations - - a. If the failcnt value constantly grows, this means that the counter's - limit is too tight. Either the group is misbehaving and consumes too - many resources, or the configuration is not suitable for the group - and the limit should be increased. - - b. The max_usage value can be used to quickly tune the group. One may - set the limits to maximal values and either load the container with - a common pattern or leave one for a while. After this the max_usage - value shows the amount of memory the container would require during - its common activity. - - Setting the limit a bit above this value gives a pretty good - configuration that works in most of the cases. - - c. If the max_usage is much less than the limit, but the failcnt value - is growing, then the group tries to allocate a big chunk of resource - at once. - - d. If the max_usage is much less than the limit, but the failcnt value - is 0, then this group is given too high limit, that it does not - require. It is better to lower the limit a bit leaving more resource - for other groups. - - - -4. Communication with the control groups subsystem (cgroups) - -All the resource controllers that are using cgroups and resource counters -should provide files (in the cgroup filesystem) to work with the resource -counter fields. They are recommended to adhere to the following rules: - - a. File names - - Field name File name - --------------------------------------------------- - usage usage_in_ - max_usage max_usage_in_ - limit limit_in_ - failcnt failcnt - lock no file :) - - b. Reading from file should show the corresponding field value in the - appropriate format. - - c. Writing to file - - Field Expected behavior - ---------------------------------- - usage prohibited - max_usage reset to usage - limit set the limit - failcnt reset to zero - - - -5. Usage example - - a. Declare a task group (take a look at cgroups subsystem for this) and - fold a res_counter into it - - struct my_group { - struct res_counter res; - - - } - - b. Put hooks in resource allocation/release paths - - int alloc_something(...) - { - if (res_counter_charge(res_counter_ptr, amount) < 0) - return -ENOMEM; - - - } - - void release_something(...) - { - res_counter_uncharge(res_counter_ptr, amount); - - - } - - In order to keep the usage value self-consistent, both the - "res_counter_ptr" and the "amount" in release_something() should be - the same as they were in the alloc_something() when the releasing - resource was allocated. - - c. Provide the way to read res_counter values and set them (the cgroups - still can help with it). - - c. Compile and run :) diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt deleted file mode 100644 index 5c86c258c79..00000000000 --- a/Documentation/cpusets.txt +++ /dev/null @@ -1,808 +0,0 @@ - CPUSETS - ------- - -Copyright (C) 2004 BULL SA. -Written by Simon.Derr@bull.net - -Portions Copyright (c) 2004-2006 Silicon Graphics, Inc. -Modified by Paul Jackson -Modified by Christoph Lameter -Modified by Paul Menage -Modified by Hidetoshi Seto - -CONTENTS: -========= - -1. Cpusets - 1.1 What are cpusets ? - 1.2 Why are cpusets needed ? - 1.3 How are cpusets implemented ? - 1.4 What are exclusive cpusets ? - 1.5 What is memory_pressure ? - 1.6 What is memory spread ? - 1.7 What is sched_load_balance ? - 1.8 What is sched_relax_domain_level ? - 1.9 How do I use cpusets ? -2. Usage Examples and Syntax - 2.1 Basic Usage - 2.2 Adding/removing cpus - 2.3 Setting flags - 2.4 Attaching processes -3. Questions -4. Contact - -1. Cpusets -========== - -1.1 What are cpusets ? ----------------------- - -Cpusets provide a mechanism for assigning a set of CPUs and Memory -Nodes to a set of tasks. In this document "Memory Node" refers to -an on-line node that contains memory. - -Cpusets constrain the CPU and Memory placement of tasks to only -the resources within a tasks current cpuset. They form a nested -hierarchy visible in a virtual file system. These are the essential -hooks, beyond what is already present, required to manage dynamic -job placement on large systems. - -Cpusets use the generic cgroup subsystem described in -Documentation/cgroups/cgroups.txt. - -Requests by a task, using the sched_setaffinity(2) system call to -include CPUs in its CPU affinity mask, and using the mbind(2) and -set_mempolicy(2) system calls to include Memory Nodes in its memory -policy, are both filtered through that tasks cpuset, filtering out any -CPUs or Memory Nodes not in that cpuset. The scheduler will not -schedule a task on a CPU that is not allowed in its cpus_allowed -vector, and the kernel page allocator will not allocate a page on a -node that is not allowed in the requesting tasks mems_allowed vector. - -User level code may create and destroy cpusets by name in the cgroup -virtual file system, manage the attributes and permissions of these -cpusets and which CPUs and Memory Nodes are assigned to each cpuset, -specify and query to which cpuset a task is assigned, and list the -task pids assigned to a cpuset. - - -1.2 Why are cpusets needed ? ----------------------------- - -The management of large computer systems, with many processors (CPUs), -complex memory cache hierarchies and multiple Memory Nodes having -non-uniform access times (NUMA) presents additional challenges for -the efficient scheduling and memory placement of processes. - -Frequently more modest sized systems can be operated with adequate -efficiency just by letting the operating system automatically share -the available CPU and Memory resources amongst the requesting tasks. - -But larger systems, which benefit more from careful processor and -memory placement to reduce memory access times and contention, -and which typically represent a larger investment for the customer, -can benefit from explicitly placing jobs on properly sized subsets of -the system. - -This can be especially valuable on: - - * Web Servers running multiple instances of the same web application, - * Servers running different applications (for instance, a web server - and a database), or - * NUMA systems running large HPC applications with demanding - performance characteristics. - -These subsets, or "soft partitions" must be able to be dynamically -adjusted, as the job mix changes, without impacting other concurrently -executing jobs. The location of the running jobs pages may also be moved -when the memory locations are changed. - -The kernel cpuset patch provides the minimum essential kernel -mechanisms required to efficiently implement such subsets. It -leverages existing CPU and Memory Placement facilities in the Linux -kernel to avoid any additional impact on the critical scheduler or -memory allocator code. - - -1.3 How are cpusets implemented ? ---------------------------------- - -Cpusets provide a Linux kernel mechanism to constrain which CPUs and -Memory Nodes are used by a process or set of processes. - -The Linux kernel already has a pair of mechanisms to specify on which -CPUs a task may be scheduled (sched_setaffinity) and on which Memory -Nodes it may obtain memory (mbind, set_mempolicy). - -Cpusets extends these two mechanisms as follows: - - - Cpusets are sets of allowed CPUs and Memory Nodes, known to the - kernel. - - Each task in the system is attached to a cpuset, via a pointer - in the task structure to a reference counted cgroup structure. - - Calls to sched_setaffinity are filtered to just those CPUs - allowed in that tasks cpuset. - - Calls to mbind and set_mempolicy are filtered to just - those Memory Nodes allowed in that tasks cpuset. - - The root cpuset contains all the systems CPUs and Memory - Nodes. - - For any cpuset, one can define child cpusets containing a subset - of the parents CPU and Memory Node resources. - - The hierarchy of cpusets can be mounted at /dev/cpuset, for - browsing and manipulation from user space. - - A cpuset may be marked exclusive, which ensures that no other - cpuset (except direct ancestors and descendents) may contain - any overlapping CPUs or Memory Nodes. - - You can list all the tasks (by pid) attached to any cpuset. - -The implementation of cpusets requires a few, simple hooks -into the rest of the kernel, none in performance critical paths: - - - in init/main.c, to initialize the root cpuset at system boot. - - in fork and exit, to attach and detach a task from its cpuset. - - in sched_setaffinity, to mask the requested CPUs by what's - allowed in that tasks cpuset. - - in sched.c migrate_all_tasks(), to keep migrating tasks within - the CPUs allowed by their cpuset, if possible. - - in the mbind and set_mempolicy system calls, to mask the requested - Memory Nodes by what's allowed in that tasks cpuset. - - in page_alloc.c, to restrict memory to allowed nodes. - - in vmscan.c, to restrict page recovery to the current cpuset. - -You should mount the "cgroup" filesystem type in order to enable -browsing and modifying the cpusets presently known to the kernel. No -new system calls are added for cpusets - all support for querying and -modifying cpusets is via this cpuset file system. - -The /proc//status file for each task has four added lines, -displaying the tasks cpus_allowed (on which CPUs it may be scheduled) -and mems_allowed (on which Memory Nodes it may obtain memory), -in the two formats seen in the following example: - - Cpus_allowed: ffffffff,ffffffff,ffffffff,ffffffff - Cpus_allowed_list: 0-127 - Mems_allowed: ffffffff,ffffffff - Mems_allowed_list: 0-63 - -Each cpuset is represented by a directory in the cgroup file system -containing (on top of the standard cgroup files) the following -files describing that cpuset: - - - cpus: list of CPUs in that cpuset - - mems: list of Memory Nodes in that cpuset - - memory_migrate flag: if set, move pages to cpusets nodes - - cpu_exclusive flag: is cpu placement exclusive? - - mem_exclusive flag: is memory placement exclusive? - - mem_hardwall flag: is memory allocation hardwalled - - memory_pressure: measure of how much paging pressure in cpuset - -In addition, the root cpuset only has the following file: - - memory_pressure_enabled flag: compute memory_pressure? - -New cpusets are created using the mkdir system call or shell -command. The properties of a cpuset, such as its flags, allowed -CPUs and Memory Nodes, and attached tasks, are modified by writing -to the appropriate file in that cpusets directory, as listed above. - -The named hierarchical structure of nested cpusets allows partitioning -a large system into nested, dynamically changeable, "soft-partitions". - -The attachment of each task, automatically inherited at fork by any -children of that task, to a cpuset allows organizing the work load -on a system into related sets of tasks such that each set is constrained -to using the CPUs and Memory Nodes of a particular cpuset. A task -may be re-attached to any other cpuset, if allowed by the permissions -on the necessary cpuset file system directories. - -Such management of a system "in the large" integrates smoothly with -the detailed placement done on individual tasks and memory regions -using the sched_setaffinity, mbind and set_mempolicy system calls. - -The following rules apply to each cpuset: - - - Its CPUs and Memory Nodes must be a subset of its parents. - - It can't be marked exclusive unless its parent is. - - If its cpu or memory is exclusive, they may not overlap any sibling. - -These rules, and the natural hierarchy of cpusets, enable efficient -enforcement of the exclusive guarantee, without having to scan all -cpusets every time any of them change to ensure nothing overlaps a -exclusive cpuset. Also, the use of a Linux virtual file system (vfs) -to represent the cpuset hierarchy provides for a familiar permission -and name space for cpusets, with a minimum of additional kernel code. - -The cpus and mems files in the root (top_cpuset) cpuset are -read-only. The cpus file automatically tracks the value of -cpu_online_map using a CPU hotplug notifier, and the mems file -automatically tracks the value of node_states[N_HIGH_MEMORY]--i.e., -nodes with memory--using the cpuset_track_online_nodes() hook. - - -1.4 What are exclusive cpusets ? --------------------------------- - -If a cpuset is cpu or mem exclusive, no other cpuset, other than -a direct ancestor or descendent, may share any of the same CPUs or -Memory Nodes. - -A cpuset that is mem_exclusive *or* mem_hardwall is "hardwalled", -i.e. it restricts kernel allocations for page, buffer and other data -commonly shared by the kernel across multiple users. All cpusets, -whether hardwalled or not, restrict allocations of memory for user -space. This enables configuring a system so that several independent -jobs can share common kernel data, such as file system pages, while -isolating each job's user allocation in its own cpuset. To do this, -construct a large mem_exclusive cpuset to hold all the jobs, and -construct child, non-mem_exclusive cpusets for each individual job. -Only a small amount of typical kernel memory, such as requests from -interrupt handlers, is allowed to be taken outside even a -mem_exclusive cpuset. - - -1.5 What is memory_pressure ? ------------------------------ -The memory_pressure of a cpuset provides a simple per-cpuset metric -of the rate that the tasks in a cpuset are attempting to free up in -use memory on the nodes of the cpuset to satisfy additional memory -requests. - -This enables batch managers monitoring jobs running in dedicated -cpusets to efficiently detect what level of memory pressure that job -is causing. - -This is useful both on tightly managed systems running a wide mix of -submitted jobs, which may choose to terminate or re-prioritize jobs that -are trying to use more memory than allowed on the nodes assigned them, -and with tightly coupled, long running, massively parallel scientific -computing jobs that will dramatically fail to meet required performance -goals if they start to use more memory than allowed to them. - -This mechanism provides a very economical way for the batch manager -to monitor a cpuset for signs of memory pressure. It's up to the -batch manager or other user code to decide what to do about it and -take action. - -==> Unless this feature is enabled by writing "1" to the special file - /dev/cpuset/memory_pressure_enabled, the hook in the rebalance - code of __alloc_pages() for this metric reduces to simply noticing - that the cpuset_memory_pressure_enabled flag is zero. So only - systems that enable this feature will compute the metric. - -Why a per-cpuset, running average: - - Because this meter is per-cpuset, rather than per-task or mm, - the system load imposed by a batch scheduler monitoring this - metric is sharply reduced on large systems, because a scan of - the tasklist can be avoided on each set of queries. - - Because this meter is a running average, instead of an accumulating - counter, a batch scheduler can detect memory pressure with a - single read, instead of having to read and accumulate results - for a period of time. - - Because this meter is per-cpuset rather than per-task or mm, - the batch scheduler can obtain the key information, memory - pressure in a cpuset, with a single read, rather than having to - query and accumulate results over all the (dynamically changing) - set of tasks in the cpuset. - -A per-cpuset simple digital filter (requires a spinlock and 3 words -of data per-cpuset) is kept, and updated by any task attached to that -cpuset, if it enters the synchronous (direct) page reclaim code. - -A per-cpuset file provides an integer number representing the recent -(half-life of 10 seconds) rate of direct page reclaims caused by -the tasks in the cpuset, in units of reclaims attempted per second, -times 1000. - - -1.6 What is memory spread ? ---------------------------- -There are two boolean flag files per cpuset that control where the -kernel allocates pages for the file system buffers and related in -kernel data structures. They are called 'memory_spread_page' and -'memory_spread_slab'. - -If the per-cpuset boolean flag file 'memory_spread_page' is set, then -the kernel will spread the file system buffers (page cache) evenly -over all the nodes that the faulting task is allowed to use, instead -of preferring to put those pages on the node where the task is running. - -If the per-cpuset boolean flag file 'memory_spread_slab' is set, -then the kernel will spread some file system related slab caches, -such as for inodes and dentries evenly over all the nodes that the -faulting task is allowed to use, instead of preferring to put those -pages on the node where the task is running. - -The setting of these flags does not affect anonymous data segment or -stack segment pages of a task. - -By default, both kinds of memory spreading are off, and memory -pages are allocated on the node local to where the task is running, -except perhaps as modified by the tasks NUMA mempolicy or cpuset -configuration, so long as sufficient free memory pages are available. - -When new cpusets are created, they inherit the memory spread settings -of their parent. - -Setting memory spreading causes allocations for the affected page -or slab caches to ignore the tasks NUMA mempolicy and be spread -instead. Tasks using mbind() or set_mempolicy() calls to set NUMA -mempolicies will not notice any change in these calls as a result of -their containing tasks memory spread settings. If memory spreading -is turned off, then the currently specified NUMA mempolicy once again -applies to memory page allocations. - -Both 'memory_spread_page' and 'memory_spread_slab' are boolean flag -files. By default they contain "0", meaning that the feature is off -for that cpuset. If a "1" is written to that file, then that turns -the named feature on. - -The implementation is simple. - -Setting the flag 'memory_spread_page' turns on a per-process flag -PF_SPREAD_PAGE for each task that is in that cpuset or subsequently -joins that cpuset. The page allocation calls for the page cache -is modified to perform an inline check for this PF_SPREAD_PAGE task -flag, and if set, a call to a new routine cpuset_mem_spread_node() -returns the node to prefer for the allocation. - -Similarly, setting 'memory_spread_slab' turns on the flag -PF_SPREAD_SLAB, and appropriately marked slab caches will allocate -pages from the node returned by cpuset_mem_spread_node(). - -The cpuset_mem_spread_node() routine is also simple. It uses the -value of a per-task rotor cpuset_mem_spread_rotor to select the next -node in the current tasks mems_allowed to prefer for the allocation. - -This memory placement policy is also known (in other contexts) as -round-robin or interleave. - -This policy can provide substantial improvements for jobs that need -to place thread local data on the corresponding node, but that need -to access large file system data sets that need to be spread across -the several nodes in the jobs cpuset in order to fit. Without this -policy, especially for jobs that might have one thread reading in the -data set, the memory allocation across the nodes in the jobs cpuset -can become very uneven. - -1.7 What is sched_load_balance ? --------------------------------- - -The kernel scheduler (kernel/sched.c) automatically load balances -tasks. If one CPU is underutilized, kernel code running on that -CPU will look for tasks on other more overloaded CPUs and move those -tasks to itself, within the constraints of such placement mechanisms -as cpusets and sched_setaffinity. - -The algorithmic cost of load balancing and its impact on key shared -kernel data structures such as the task list increases more than -linearly with the number of CPUs being balanced. So the scheduler -has support to partition the systems CPUs into a number of sched -domains such that it only load balances within each sched domain. -Each sched domain covers some subset of the CPUs in the system; -no two sched domains overlap; some CPUs might not be in any sched -domain and hence won't be load balanced. - -Put simply, it costs less to balance between two smaller sched domains -than one big one, but doing so means that overloads in one of the -two domains won't be load balanced to the other one. - -By default, there is one sched domain covering all CPUs, except those -marked isolated using the kernel boot time "isolcpus=" argument. - -This default load balancing across all CPUs is not well suited for -the following two situations: - 1) On large systems, load balancing across many CPUs is expensive. - If the system is managed using cpusets to place independent jobs - on separate sets of CPUs, full load balancing is unnecessary. - 2) Systems supporting realtime on some CPUs need to minimize - system overhead on those CPUs, including avoiding task load - balancing if that is not needed. - -When the per-cpuset flag "sched_load_balance" is enabled (the default -setting), it requests that all the CPUs in that cpusets allowed 'cpus' -be contained in a single sched domain, ensuring that load balancing -can move a task (not otherwised pinned, as by sched_setaffinity) -from any CPU in that cpuset to any other. - -When the per-cpuset flag "sched_load_balance" is disabled, then the -scheduler will avoid load balancing across the CPUs in that cpuset, ---except-- in so far as is necessary because some overlapping cpuset -has "sched_load_balance" enabled. - -So, for example, if the top cpuset has the flag "sched_load_balance" -enabled, then the scheduler will have one sched domain covering all -CPUs, and the setting of the "sched_load_balance" flag in any other -cpusets won't matter, as we're already fully load balancing. - -Therefore in the above two situations, the top cpuset flag -"sched_load_balance" should be disabled, and only some of the smaller, -child cpusets have this flag enabled. - -When doing this, you don't usually want to leave any unpinned tasks in -the top cpuset that might use non-trivial amounts of CPU, as such tasks -may be artificially constrained to some subset of CPUs, depending on -the particulars of this flag setting in descendent cpusets. Even if -such a task could use spare CPU cycles in some other CPUs, the kernel -scheduler might not consider the possibility of load balancing that -task to that underused CPU. - -Of course, tasks pinned to a particular CPU can be left in a cpuset -that disables "sched_load_balance" as those tasks aren't going anywhere -else anyway. - -There is an impedance mismatch here, between cpusets and sched domains. -Cpusets are hierarchical and nest. Sched domains are flat; they don't -overlap and each CPU is in at most one sched domain. - -It is necessary for sched domains to be flat because load balancing -across partially overlapping sets of CPUs would risk unstable dynamics -that would be beyond our understanding. So if each of two partially -overlapping cpusets enables the flag 'sched_load_balance', then we -form a single sched domain that is a superset of both. We won't move -a task to a CPU outside it cpuset, but the scheduler load balancing -code might waste some compute cycles considering that possibility. - -This mismatch is why there is not a simple one-to-one relation -between which cpusets have the flag "sched_load_balance" enabled, -and the sched domain configuration. If a cpuset enables the flag, it -will get balancing across all its CPUs, but if it disables the flag, -it will only be assured of no load balancing if no other overlapping -cpuset enables the flag. - -If two cpusets have partially overlapping 'cpus' allowed, and only -one of them has this flag enabled, then the other may find its -tasks only partially load balanced, just on the overlapping CPUs. -This is just the general case of the top_cpuset example given a few -paragraphs above. In the general case, as in the top cpuset case, -don't leave tasks that might use non-trivial amounts of CPU in -such partially load balanced cpusets, as they may be artificially -constrained to some subset of the CPUs allowed to them, for lack of -load balancing to the other CPUs. - -1.7.1 sched_load_balance implementation details. ------------------------------------------------- - -The per-cpuset flag 'sched_load_balance' defaults to enabled (contrary -to most cpuset flags.) When enabled for a cpuset, the kernel will -ensure that it can load balance across all the CPUs in that cpuset -(makes sure that all the CPUs in the cpus_allowed of that cpuset are -in the same sched domain.) - -If two overlapping cpusets both have 'sched_load_balance' enabled, -then they will be (must be) both in the same sched domain. - -If, as is the default, the top cpuset has 'sched_load_balance' enabled, -then by the above that means there is a single sched domain covering -the whole system, regardless of any other cpuset settings. - -The kernel commits to user space that it will avoid load balancing -where it can. It will pick as fine a granularity partition of sched -domains as it can while still providing load balancing for any set -of CPUs allowed to a cpuset having 'sched_load_balance' enabled. - -The internal kernel cpuset to scheduler interface passes from the -cpuset code to the scheduler code a partition of the load balanced -CPUs in the system. This partition is a set of subsets (represented -as an array of cpumask_t) of CPUs, pairwise disjoint, that cover all -the CPUs that must be load balanced. - -Whenever the 'sched_load_balance' flag changes, or CPUs come or go -from a cpuset with this flag enabled, or a cpuset with this flag -enabled is removed, the cpuset code builds a new such partition and -passes it to the scheduler sched domain setup code, to have the sched -domains rebuilt as necessary. - -This partition exactly defines what sched domains the scheduler should -setup - one sched domain for each element (cpumask_t) in the partition. - -The scheduler remembers the currently active sched domain partitions. -When the scheduler routine partition_sched_domains() is invoked from -the cpuset code to update these sched domains, it compares the new -partition requested with the current, and updates its sched domains, -removing the old and adding the new, for each change. - - -1.8 What is sched_relax_domain_level ? --------------------------------------- - -In sched domain, the scheduler migrates tasks in 2 ways; periodic load -balance on tick, and at time of some schedule events. - -When a task is woken up, scheduler try to move the task on idle CPU. -For example, if a task A running on CPU X activates another task B -on the same CPU X, and if CPU Y is X's sibling and performing idle, -then scheduler migrate task B to CPU Y so that task B can start on -CPU Y without waiting task A on CPU X. - -And if a CPU run out of tasks in its runqueue, the CPU try to pull -extra tasks from other busy CPUs to help them before it is going to -be idle. - -Of course it takes some searching cost to find movable tasks and/or -idle CPUs, the scheduler might not search all CPUs in the domain -everytime. In fact, in some architectures, the searching ranges on -events are limited in the same socket or node where the CPU locates, -while the load balance on tick searchs all. - -For example, assume CPU Z is relatively far from CPU X. Even if CPU Z -is idle while CPU X and the siblings are busy, scheduler can't migrate -woken task B from X to Z since it is out of its searching range. -As the result, task B on CPU X need to wait task A or wait load balance -on the next tick. For some applications in special situation, waiting -1 tick may be too long. - -The 'sched_relax_domain_level' file allows you to request changing -this searching range as you like. This file takes int value which -indicates size of searching range in levels ideally as follows, -otherwise initial value -1 that indicates the cpuset has no request. - - -1 : no request. use system default or follow request of others. - 0 : no search. - 1 : search siblings (hyperthreads in a core). - 2 : search cores in a package. - 3 : search cpus in a node [= system wide on non-NUMA system] - ( 4 : search nodes in a chunk of node [on NUMA system] ) - ( 5 : search system wide [on NUMA system] ) - -The system default is architecture dependent. The system default -can be changed using the relax_domain_level= boot parameter. - -This file is per-cpuset and affect the sched domain where the cpuset -belongs to. Therefore if the flag 'sched_load_balance' of a cpuset -is disabled, then 'sched_relax_domain_level' have no effect since -there is no sched domain belonging the cpuset. - -If multiple cpusets are overlapping and hence they form a single sched -domain, the largest value among those is used. Be careful, if one -requests 0 and others are -1 then 0 is used. - -Note that modifying this file will have both good and bad effects, -and whether it is acceptable or not will be depend on your situation. -Don't modify this file if you are not sure. - -If your situation is: - - The migration costs between each cpu can be assumed considerably - small(for you) due to your special application's behavior or - special hardware support for CPU cache etc. - - The searching cost doesn't have impact(for you) or you can make - the searching cost enough small by managing cpuset to compact etc. - - The latency is required even it sacrifices cache hit rate etc. -then increasing 'sched_relax_domain_level' would benefit you. - - -1.9 How do I use cpusets ? --------------------------- - -In order to minimize the impact of cpusets on critical kernel -code, such as the scheduler, and due to the fact that the kernel -does not support one task updating the memory placement of another -task directly, the impact on a task of changing its cpuset CPU -or Memory Node placement, or of changing to which cpuset a task -is attached, is subtle. - -If a cpuset has its Memory Nodes modified, then for each task attached -to that cpuset, the next time that the kernel attempts to allocate -a page of memory for that task, the kernel will notice the change -in the tasks cpuset, and update its per-task memory placement to -remain within the new cpusets memory placement. If the task was using -mempolicy MPOL_BIND, and the nodes to which it was bound overlap with -its new cpuset, then the task will continue to use whatever subset -of MPOL_BIND nodes are still allowed in the new cpuset. If the task -was using MPOL_BIND and now none of its MPOL_BIND nodes are allowed -in the new cpuset, then the task will be essentially treated as if it -was MPOL_BIND bound to the new cpuset (even though its numa placement, -as queried by get_mempolicy(), doesn't change). If a task is moved -from one cpuset to another, then the kernel will adjust the tasks -memory placement, as above, the next time that the kernel attempts -to allocate a page of memory for that task. - -If a cpuset has its 'cpus' modified, then each task in that cpuset -will have its allowed CPU placement changed immediately. Similarly, -if a tasks pid is written to a cpusets 'tasks' file, in either its -current cpuset or another cpuset, then its allowed CPU placement is -changed immediately. If such a task had been bound to some subset -of its cpuset using the sched_setaffinity() call, the task will be -allowed to run on any CPU allowed in its new cpuset, negating the -affect of the prior sched_setaffinity() call. - -In summary, the memory placement of a task whose cpuset is changed is -updated by the kernel, on the next allocation of a page for that task, -but the processor placement is not updated, until that tasks pid is -rewritten to the 'tasks' file of its cpuset. This is done to avoid -impacting the scheduler code in the kernel with a check for changes -in a tasks processor placement. - -Normally, once a page is allocated (given a physical page -of main memory) then that page stays on whatever node it -was allocated, so long as it remains allocated, even if the -cpusets memory placement policy 'mems' subsequently changes. -If the cpuset flag file 'memory_migrate' is set true, then when -tasks are attached to that cpuset, any pages that task had -allocated to it on nodes in its previous cpuset are migrated -to the tasks new cpuset. The relative placement of the page within -the cpuset is preserved during these migration operations if possible. -For example if the page was on the second valid node of the prior cpuset -then the page will be placed on the second valid node of the new cpuset. - -Also if 'memory_migrate' is set true, then if that cpusets -'mems' file is modified, pages allocated to tasks in that -cpuset, that were on nodes in the previous setting of 'mems', -will be moved to nodes in the new setting of 'mems.' -Pages that were not in the tasks prior cpuset, or in the cpusets -prior 'mems' setting, will not be moved. - -There is an exception to the above. If hotplug functionality is used -to remove all the CPUs that are currently assigned to a cpuset, -then all the tasks in that cpuset will be moved to the nearest ancestor -with non-empty cpus. But the moving of some (or all) tasks might fail if -cpuset is bound with another cgroup subsystem which has some restrictions -on task attaching. In this failing case, those tasks will stay -in the original cpuset, and the kernel will automatically update -their cpus_allowed to allow all online CPUs. When memory hotplug -functionality for removing Memory Nodes is available, a similar exception -is expected to apply there as well. In general, the kernel prefers to -violate cpuset placement, over starving a task that has had all -its allowed CPUs or Memory Nodes taken offline. - -There is a second exception to the above. GFP_ATOMIC requests are -kernel internal allocations that must be satisfied, immediately. -The kernel may drop some request, in rare cases even panic, if a -GFP_ATOMIC alloc fails. If the request cannot be satisfied within -the current tasks cpuset, then we relax the cpuset, and look for -memory anywhere we can find it. It's better to violate the cpuset -than stress the kernel. - -To start a new job that is to be contained within a cpuset, the steps are: - - 1) mkdir /dev/cpuset - 2) mount -t cgroup -ocpuset cpuset /dev/cpuset - 3) Create the new cpuset by doing mkdir's and write's (or echo's) in - the /dev/cpuset virtual file system. - 4) Start a task that will be the "founding father" of the new job. - 5) Attach that task to the new cpuset by writing its pid to the - /dev/cpuset tasks file for that cpuset. - 6) fork, exec or clone the job tasks from this founding father task. - -For example, the following sequence of commands will setup a cpuset -named "Charlie", containing just CPUs 2 and 3, and Memory Node 1, -and then start a subshell 'sh' in that cpuset: - - mount -t cgroup -ocpuset cpuset /dev/cpuset - cd /dev/cpuset - mkdir Charlie - cd Charlie - /bin/echo 2-3 > cpus - /bin/echo 1 > mems - /bin/echo $$ > tasks - sh - # The subshell 'sh' is now running in cpuset Charlie - # The next line should display '/Charlie' - cat /proc/self/cpuset - -In the future, a C library interface to cpusets will likely be -available. For now, the only way to query or modify cpusets is -via the cpuset file system, using the various cd, mkdir, echo, cat, -rmdir commands from the shell, or their equivalent from C. - -The sched_setaffinity calls can also be done at the shell prompt using -SGI's runon or Robert Love's taskset. The mbind and set_mempolicy -calls can be done at the shell prompt using the numactl command -(part of Andi Kleen's numa package). - -2. Usage Examples and Syntax -============================ - -2.1 Basic Usage ---------------- - -Creating, modifying, using the cpusets can be done through the cpuset -virtual filesystem. - -To mount it, type: -# mount -t cgroup -o cpuset cpuset /dev/cpuset - -Then under /dev/cpuset you can find a tree that corresponds to the -tree of the cpusets in the system. For instance, /dev/cpuset -is the cpuset that holds the whole system. - -If you want to create a new cpuset under /dev/cpuset: -# cd /dev/cpuset -# mkdir my_cpuset - -Now you want to do something with this cpuset. -# cd my_cpuset - -In this directory you can find several files: -# ls -cpu_exclusive memory_migrate mems tasks -cpus memory_pressure notify_on_release -mem_exclusive memory_spread_page sched_load_balance -mem_hardwall memory_spread_slab sched_relax_domain_level - -Reading them will give you information about the state of this cpuset: -the CPUs and Memory Nodes it can use, the processes that are using -it, its properties. By writing to these files you can manipulate -the cpuset. - -Set some flags: -# /bin/echo 1 > cpu_exclusive - -Add some cpus: -# /bin/echo 0-7 > cpus - -Add some mems: -# /bin/echo 0-7 > mems - -Now attach your shell to this cpuset: -# /bin/echo $$ > tasks - -You can also create cpusets inside your cpuset by using mkdir in this -directory. -# mkdir my_sub_cs - -To remove a cpuset, just use rmdir: -# rmdir my_sub_cs -This will fail if the cpuset is in use (has cpusets inside, or has -processes attached). - -Note that for legacy reasons, the "cpuset" filesystem exists as a -wrapper around the cgroup filesystem. - -The command - -mount -t cpuset X /dev/cpuset - -is equivalent to - -mount -t cgroup -ocpuset X /dev/cpuset -echo "/sbin/cpuset_release_agent" > /dev/cpuset/release_agent - -2.2 Adding/removing cpus ------------------------- - -This is the syntax to use when writing in the cpus or mems files -in cpuset directories: - -# /bin/echo 1-4 > cpus -> set cpus list to cpus 1,2,3,4 -# /bin/echo 1,2,3,4 > cpus -> set cpus list to cpus 1,2,3,4 - -2.3 Setting flags ------------------ - -The syntax is very simple: - -# /bin/echo 1 > cpu_exclusive -> set flag 'cpu_exclusive' -# /bin/echo 0 > cpu_exclusive -> unset flag 'cpu_exclusive' - -2.4 Attaching processes ------------------------ - -# /bin/echo PID > tasks - -Note that it is PID, not PIDs. You can only attach ONE task at a time. -If you have several tasks to attach, you have to do it one after another: - -# /bin/echo PID1 > tasks -# /bin/echo PID2 > tasks - ... -# /bin/echo PIDn > tasks - - -3. Questions -============ - -Q: what's up with this '/bin/echo' ? -A: bash's builtin 'echo' command does not check calls to write() against - errors. If you use it in the cpuset file system, you won't be - able to tell whether a command succeeded or failed. - -Q: When I attach processes, only the first of the line gets really attached ! -A: We can only return one error code per call to write(). So you should also - put only ONE pid. - -4. Contact -========== - -Web: http://www.bullopensource.org/cpuset diff --git a/Documentation/scheduler/sched-design-CFS.txt b/Documentation/scheduler/sched-design-CFS.txt index 8398ca4ff4e..6f33593e59e 100644 --- a/Documentation/scheduler/sched-design-CFS.txt +++ b/Documentation/scheduler/sched-design-CFS.txt @@ -231,7 +231,7 @@ CPU bandwidth control purposes: This options needs CONFIG_CGROUPS to be defined, and lets the administrator create arbitrary groups of tasks, using the "cgroup" pseudo filesystem. See - Documentation/cgroups.txt for more information about this filesystem. + Documentation/cgroups/cgroups.txt for more information about this filesystem. Only one of these options to group tasks can be chosen and not both. diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index dede0a2cfc4..4c5bcf6ca7e 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -9,7 +9,7 @@ * * Author: Pavel Emelianov * - * See Documentation/controllers/resource_counter.txt for more + * See Documentation/cgroups/resource_counter.txt for more * info about what this counter is. */ diff --git a/init/Kconfig b/init/Kconfig index 56fd93c63c7..2af83825634 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -323,8 +323,8 @@ config CGROUP_SCHED This option allows you to create arbitrary task groups using the "cgroup" pseudo filesystem and control the cpu bandwidth allocated to each such task group. - Refer to Documentation/cgroups.txt for more information - on "cgroup" pseudo filesystem. + Refer to Documentation/cgroups/cgroups.txt for more + information on "cgroup" pseudo filesystem. endchoice @@ -335,10 +335,9 @@ menuconfig CGROUPS use with process control subsystems such as Cpusets, CFS, memory controls or device isolation. See - - Documentation/cpusets.txt (Cpusets) - Documentation/scheduler/sched-design-CFS.txt (CFS) - - Documentation/cgroups/ (features for grouping, isolation) - - Documentation/controllers/ (features for resource control) + - Documentation/cgroups/ (features for grouping, isolation + and resource control) Say N if unsure. diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 647c77a88fc..a85678865c5 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -568,7 +568,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) * load balancing domains (sched domains) as specified by that partial * partition. * - * See "What is sched_load_balance" in Documentation/cpusets.txt + * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt * for a background explanation of this. * * Does not return errors, on the theory that the callers of this -- cgit From b098161b4d0231f7dc5306111d576c0bfe0c8eba Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Jan 2009 13:51:00 -0800 Subject: idr: fix wrong kernel-doc idr_get_new_above() and ida_get_new_above() return an id in the range of @staring_id ... 0x7fffffff, not 0 ... 0x7fffffff. Signed-off-by: Li Zefan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/idr.c b/lib/idr.c index 1c4f9281f41..6b7cfa6508f 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -292,7 +292,7 @@ static int idr_get_new_above_int(struct idr *idp, void *ptr, int starting_id) * and go back to the idr_pre_get() call. If the idr is full, it will * return -ENOSPC. * - * @id returns a value in the range 0 ... 0x7fffffff + * @id returns a value in the range @starting_id ... 0x7fffffff */ int idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) { @@ -723,7 +723,7 @@ EXPORT_SYMBOL(ida_pre_get); * and go back to the ida_pre_get() call. If the ida is full, it will * return -ENOSPC. * - * @p_id returns a value in the range 0 ... 0x7fffffff. + * @p_id returns a value in the range @starting_id ... 0x7fffffff. */ int ida_get_new_above(struct ida *ida, int starting_id, int *p_id) { -- cgit From 65a67bd2644bef225ee318dde76016a4697218fa Mon Sep 17 00:00:00 2001 From: Marcus Meissner Date: Thu, 15 Jan 2009 13:51:00 -0800 Subject: Documentation/accounting/getdelays.c: fix endless loop When no option is passed to getdelays it just hangs, waiting for a reply which will never come. This patch prints usage() when no output marker is specified. Signed-off-by: Marcus Meissner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/accounting/getdelays.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index cc49400b4af..7ea231172c8 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c @@ -392,6 +392,10 @@ int main(int argc, char *argv[]) goto err; } } + if (!maskset && !tid && !containerset) { + usage(); + goto err; + } do { int i; -- cgit From 3eabdb76a03bbe8f556162738c264dbfb24cff6a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Jan 2009 13:51:01 -0800 Subject: jbd: fix missing kernel-doc Fix jbd header file kernel-doc notation: Warning(linux-2.6.28-git13//include/linux/jbd.h:823): No description found for parameter 'j_average_commit_time' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/jbd.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 6384b19efe6..64246dce566 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -614,6 +614,8 @@ struct transaction_s * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the * number that will fit in j_blocksize * @j_last_sync_writer: most recent pid which did a synchronous write + * @j_average_commit_time: the average amount of time in nanoseconds it + * takes to commit a transaction to the disk. * @j_private: An opaque pointer to fs-private information. */ -- cgit From 6ae301e85c9c58d2f430a8a7057ce488b7ff76df Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Jan 2009 13:51:01 -0800 Subject: resources: fix parameter name and kernel-doc Fix __request_region() parameter kernel-doc notation and parameter name: Warning(linux-2.6.28-git10//kernel/resource.c:627): No description found for parameter 'flags' Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ioport.h | 3 ++- kernel/resource.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/include/linux/ioport.h b/include/linux/ioport.h index f6bb2ca8e3b..32e4b2f7229 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -143,7 +143,8 @@ static inline unsigned long resource_type(struct resource *res) extern struct resource * __request_region(struct resource *, resource_size_t start, - resource_size_t n, const char *name, int relaxed); + resource_size_t n, + const char *name, int flags); /* Compatibility cruft */ #define release_region(start,n) __release_region(&ioport_resource, (start), (n)) diff --git a/kernel/resource.c b/kernel/resource.c index ca6a1536b20..fd5d7d574bb 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -620,6 +620,7 @@ resource_size_t resource_alignment(struct resource *res) * @start: resource start address * @n: resource region size * @name: reserving caller's ID string + * @flags: IO resource flags */ struct resource * __request_region(struct resource *parent, resource_size_t start, resource_size_t n, -- cgit From 71038f527f3665f562cf9afe083df729958a099b Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 15 Jan 2009 13:51:02 -0800 Subject: MAINTAINERS: add entry for freezer Now that people are using freezer for non-suspend/hibernation stuff, it should have separate maintainers entry so that it is easier to find. Signed-off-by: Pavel Machek Acked-by: Rafael J. Wysocki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 73548f8ad0b..3fe4dc2c256 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1821,6 +1821,14 @@ M: hch@infradead.org W: ftp://ftp.openlinux.org/pub/people/hch/vxfs S: Maintained +FREEZER +P: Pavel Machek +M: pavel@suse.cz +P: Rafael J. Wysocki +M: rjw@sisk.pl +L: linux-pm@lists.linux-foundation.org +S: Supported + FTRACE P: Steven Rostedt M: rostedt@goodmis.org -- cgit From 1bcbf31337391a2f54ef6c1e8871c2de5944a7dc Mon Sep 17 00:00:00 2001 From: Qinghuang Feng Date: Thu, 15 Jan 2009 13:51:03 -0800 Subject: btrfs & squashfs: Move btrfs and squashfsto's magic number to Use the standard magic.h for btrfs and squashfs. Signed-off-by: Qinghuang Feng Cc: Phillip Lougher Cc: Chris Mason Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/btrfs/super.c | 2 +- fs/squashfs/squashfs_fs.h | 1 - fs/squashfs/super.c | 1 + include/linux/magic.h | 2 ++ 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0a14b495532..7256cf242eb 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -51,7 +52,6 @@ #include "export.h" #include "compression.h" -#define BTRFS_SUPER_MAGIC 0x9123683E static struct super_operations btrfs_super_ops; diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 6840da1bf21..283daafc568 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -26,7 +26,6 @@ #define SQUASHFS_CACHED_FRAGMENTS CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE #define SQUASHFS_MAJOR 4 #define SQUASHFS_MINOR 0 -#define SQUASHFS_MAGIC 0x73717368 #define SQUASHFS_START 0 /* size of metadata (inode and directory) blocks */ diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index a0466d7467b..071df5b5b49 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "squashfs_fs.h" #include "squashfs_fs_sb.h" diff --git a/include/linux/magic.h b/include/linux/magic.h index 439f6f3cb0c..0b4df7eba85 100644 --- a/include/linux/magic.h +++ b/include/linux/magic.h @@ -10,11 +10,13 @@ #define SYSFS_MAGIC 0x62656572 #define SECURITYFS_MAGIC 0x73636673 #define TMPFS_MAGIC 0x01021994 +#define SQUASHFS_MAGIC 0x73717368 #define EFS_SUPER_MAGIC 0x414A53 #define EXT2_SUPER_MAGIC 0xEF53 #define EXT3_SUPER_MAGIC 0xEF53 #define XENFS_SUPER_MAGIC 0xabba1974 #define EXT4_SUPER_MAGIC 0xEF53 +#define BTRFS_SUPER_MAGIC 0x9123683E #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 -- cgit From c91192d66d6cea7878b8542c9d9f1873971aba92 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 15 Jan 2009 13:51:03 -0800 Subject: nbd: do not allow two clients at the same time Two nbd-clients at same time are bad idea, and cause WARN_ON from nbd in 2.6.28-rc7 from sysfs_add_one. This simply prevents that from happening. To reproduce: cat /dev/zero | head -c 10000000 > /tmp/delme.fstest.fs nbd-server 9100 -l /anyone.can.connect > /tmp/delme.fstest.fs & sleep 1 nbd-client localhost 9100 /dev/nd0 & nbd-client localhost 9100 /dev/nd0 & Signed-off-by: Pavel Machek Acked-by: Paul Clements Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/nbd.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 7bcc1d8bc96..34f80fa6fed 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -406,6 +406,7 @@ static int nbd_do_it(struct nbd_device *lo) ret = sysfs_create_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); if (ret) { printk(KERN_ERR "nbd: sysfs_create_file failed!"); + lo->pid = 0; return ret; } @@ -413,6 +414,7 @@ static int nbd_do_it(struct nbd_device *lo) nbd_end_request(req); sysfs_remove_file(&disk_to_dev(lo->disk)->kobj, &pid_attr.attr); + lo->pid = 0; return 0; } @@ -648,6 +650,8 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode, set_capacity(lo->disk, lo->bytesize >> 9); return 0; case NBD_DO_IT: + if (lo->pid) + return -EBUSY; if (!lo->file) return -EINVAL; thread = kthread_create(nbd_thread, lo, lo->disk->disk_name); -- cgit From a27506459c5e6ccc8437fca0adb6d3759c883c28 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 15 Jan 2009 13:51:04 -0800 Subject: checkpatch: handle missing #if open in context If the #if opening statement is not in the context then the context stack can be empty. Handle this by ensuring there is always a blank entry in the stack. Signed-off-by: Andy Whitcroft Tested-by: Dhaval Giani Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 7bed4ed2c51..eefef65fa58 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -411,13 +411,15 @@ sub ctx_statement_block { my $type = ''; my $level = 0; - my @stack = ([$type, $level]); + my @stack = (); my $p; my $c; my $len = 0; my $remainder; while (1) { + @stack = (['', 0]) if ($#stack == -1); + #warn "CSB: blk<$blk> remain<$remain>\n"; # If we are about to drop off the end, pull in more # context. -- cgit From f9a0b3d17a01fe1ba24ce1e9c18666a52052e011 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 15 Jan 2009 13:51:05 -0800 Subject: checkpatch: type/cast spacing should not check prefix spacing We should not be complaining about the prefix spacing for types and casts. We are triggering here because the check for spacing between '*'s is overly loose. Tighten this up. Signed-off-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index eefef65fa58..1d7924ad3a3 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -1665,7 +1665,7 @@ sub process { # Should not end with a space. $to =~ s/\s+$//; # '*'s should not have spaces between. - while ($to =~ s/(.)\s\*/$1\*/) { + while ($to =~ s/\*\s+\*/\*\*/) { } #print "from<$from> to<$to>\n"; @@ -1680,7 +1680,7 @@ sub process { # Should not end with a space. $to =~ s/\s+$//; # '*'s should not have spaces between. - while ($to =~ s/(.)\s\*/$1\*/) { + while ($to =~ s/\*\s+\*/\*\*/) { } # Modifiers should have spaces. $to =~ s/(\b$Modifier$)/$1 /; -- cgit From 63f17f8973ccdb8260e59ce5b1b4e2b2ee0401f0 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 15 Jan 2009 13:51:06 -0800 Subject: checkpatch: allow parentheses on return handle array values When we allow return to have surrounding parentheses when containing comparison operators we are not correctly handling the case where the values contain array sufffixes. Squash them. Signed-off-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 1d7924ad3a3..696196e194f 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2016,7 +2016,11 @@ sub process { # Flatten any parentheses $value =~ s/\)\(/\) \(/g; - while ($value !~ /(?:$Ident|-?$Constant)\s*$Compare\s*(?:$Ident|-?$Constant)/ && $value =~ s/\([^\(\)]*\)/1/) { + while ($value =~ s/\[[^\{\}]*\]/1/ || + $value !~ /(?:$Ident|-?$Constant)\s* + $Compare\s* + (?:$Ident|-?$Constant)/x && + $value =~ s/\([^\(\)]*\)/1/) { } if ($value =~ /^(?:$Ident|-?$Constant)$/) { -- cgit From 39667782362becd5527e48d6c976a9f9985b95e6 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 15 Jan 2009 13:51:06 -0800 Subject: checkpatch: if should not continue a preceeding brace We should not be continuing a braced section with an if, for example: if (...) { } if (...) { } Detect this and suggest adding a newline. Signed-off-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 696196e194f..5ea55e330c2 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2108,6 +2108,11 @@ sub process { ERROR("trailing statements should be on next line\n" . $herecurr); } } +# if should not continue a brace + if ($line =~ /}\s*if\b/) { + ERROR("trailing statements should be on next line\n" . + $herecurr); + } # case and default should not have general statements after them if ($line =~ /^.\s*(?:case\s*.*|default\s*):/g && $line !~ /\G(?: -- cgit From 6903ffb2257266472ef2edd0092d526ae2dc00f7 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 15 Jan 2009 13:51:07 -0800 Subject: checkpatch: struct seq_operations should normally be const In the general use case struct seq_operations should be a const object. Check for and warn where it is not. Cc: Ingo Molnar Signed-off-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 5ea55e330c2..447435c33dd 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -2527,9 +2527,10 @@ sub process { WARN("please use device_initcall() instead of __initcall()\n" . $herecurr); } # check for struct file_operations, ensure they are const. - if ($line =~ /\bstruct\s+file_operations\b/ && - $line !~ /\bconst\b/) { - WARN("struct file_operations should normally be const\n" . $herecurr); + if ($line !~ /\bconst\b/ && + $line =~ /\bstruct\s+(file_operations|seq_operations)\b/) { + WARN("struct $1 should normally be const\n" . + $herecurr); } # use of NR_CPUS is usually wrong -- cgit From db92a6502d4e8cb885e85e862b24ba5c07036fbf Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Thu, 15 Jan 2009 13:51:07 -0800 Subject: checkpatch: version: 0.27 Signed-off-by: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 447435c33dd..45eb0ae98eb 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -10,7 +10,7 @@ use strict; my $P = $0; $P =~ s@.*/@@g; -my $V = '0.26'; +my $V = '0.27'; use Getopt::Long qw(:config no_auto_abbrev); -- cgit From c3d6362b8717759de7f2086f9665a4d96cacbc51 Mon Sep 17 00:00:00 2001 From: Alex Murray Date: Thu, 15 Jan 2009 13:51:08 -0800 Subject: hwmon: applesmc: fix light sensor readings on newer MacBooks The light sensors ALV0 and ALV1 on newer MacBooks (early 2008 and later) changed to report 10 bytes instead the earlier 6, and the sensor encoding subsequently changed. As a result, the reported light sensors readings are much too low. Via experiments leading up to this patch, it seems only the ALV0 is reporting data, and the most useful value therein is a 10-bit big-endian value at offset 6. This suggests that a new protocol was added as a backward-compatible replacement on top of the old one. This patch makes applesmc report the improved light sensor reading for the new machines, on a scale in conformance with earlier ones. Signed-off-by: Alex Murray Signed-off-by: Henrik Rydberg Cc: Nicolas Boichat Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/applesmc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/hwmon/applesmc.c b/drivers/hwmon/applesmc.c index dca47a591ba..e3018623658 100644 --- a/drivers/hwmon/applesmc.c +++ b/drivers/hwmon/applesmc.c @@ -590,6 +590,11 @@ static ssize_t applesmc_light_show(struct device *dev, } ret = applesmc_read_key(LIGHT_SENSOR_LEFT_KEY, buffer, data_length); + /* newer macbooks report a single 10-bit bigendian value */ + if (data_length == 10) { + left = be16_to_cpu(*(__be16 *)(buffer + 6)) >> 2; + goto out; + } left = buffer[2]; if (ret) goto out; -- cgit From bd112db872c2f69993c86f458467acb4a14da010 Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Thu, 15 Jan 2009 13:51:11 -0800 Subject: memcg: fix mem_cgroup_get_reclaim_stat_from_page In case of swapin, a new page is added to lru before it is charged, so page->pc->mem_cgroup points to NULL or last mem_cgroup the page was charged before. In the latter case, if the mem_cgroup has already freed by rmdir, the area pointed to by page->pc->mem_cgroup may have invalid data. Actually, I saw general protection fault. general protection fault: 0000 [#1] SMP last sysfs file: /sys/devices/system/cpu/cpu15/cache/index1/shared_cpu_map CPU 4 Modules linked in: ipt_REJECT xt_tcpudp iptable_filter ip_tables x_tables bridge stp ipv6 autofs4 hidp rfcomm l2cap bluetooth sunrpc dm_mirror dm_region_hash dm_log dm_multipath dm_mod rfkill input_polldev sbs sbshc battery ac lp sg ide_cd_mod cdrom button serio_raw acpi_memhotplug parport_pc e1000 rtc_cmos parport rtc_core rtc_lib i2c_i801 i2c_core shpchp pcspkr ata_piix libata megaraid_mbox megaraid_mm sd_mod scsi_mod ext3 jbd ehci_hcd ohci_hcd uhci_hcd [last unloaded: microcode] Pid: 26038, comm: page01 Tainted: G W 2.6.28-rc9-mm1-mmotm-2008-12-22-16-14-f2ab3dea #1 RIP: 0010:[] [] update_page_reclaim_stat+0x2f/0x42 RSP: 0000:ffff8801ee457da8 EFLAGS: 00010002 RAX: 32353438312021c8 RBX: 0000000000000000 RCX: 32353438312021c8 RDX: 0000000000000000 RSI: ffff8800cb0b1000 RDI: ffff8801164d1d28 RBP: ffff880110002cb8 R08: ffff88010f2eae23 R09: 0000000000000001 R10: ffff8800bc514b00 R11: ffff880110002c00 R12: 0000000000000000 R13: ffff88000f484100 R14: 0000000000000003 R15: 00000000001200d2 FS: 00007f8a261726f0(0000) GS:ffff88010f2eaa80(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f8a25d22000 CR3: 00000001ef18c000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process page01 (pid: 26038, threadinfo ffff8801ee456000, task ffff8800b585b960) Stack: ffffe200071ee568 ffff880110001f00 0000000000000000 ffffffff8028ea17 ffff88000f484100 0000000000000000 0000000000000020 00007f8a25d22000 ffff8800bc514b00 ffffffff8028ec34 0000000000000000 0000000000016fd8 Call Trace: [] ? ____pagevec_lru_add+0xc1/0x13c [] ? drain_cpu_pagevecs+0x36/0x89 [] ? swapin_readahead+0x78/0x98 [] ? handle_mm_fault+0x3d9/0x741 [] ? do_page_fault+0x3ce/0x78c [] ? trace_hardirqs_off_thunk+0x3a/0x3c [] ? page_fault+0x1f/0x30 Code: cc 55 48 8d af b8 0d 00 00 48 89 f7 53 89 d3 e8 39 85 02 00 48 63 d3 48 ff 44 d5 10 45 85 e4 74 05 48 ff 44 d5 00 48 85 c0 74 0e <48> ff 44 d0 10 45 85 e4 74 04 48 ff 04 d0 5b 5d 41 5c c3 41 54 RIP [] update_page_reclaim_stat+0x2f/0x42 RSP Signed-off-by: Daisuke Nishimura Acked-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Pavel Emelyanov Cc: Li Zefan Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e2996b80601..b6651277116 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -358,6 +358,10 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) return; pc = lookup_page_cgroup(page); + /* + * Used bit is set without atomic ops but after smp_wmb(). + * For making pc->mem_cgroup visible, insert smp_rmb() here. + */ smp_rmb(); /* unused page is not rotated. */ if (!PageCgroupUsed(pc)) @@ -374,7 +378,10 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) if (mem_cgroup_disabled()) return; pc = lookup_page_cgroup(page); - /* barrier to sync with "charge" */ + /* + * Used bit is set without atomic ops but after smp_wmb(). + * For making pc->mem_cgroup visible, insert smp_rmb() here. + */ smp_rmb(); if (!PageCgroupUsed(pc)) return; @@ -559,6 +566,14 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) return NULL; pc = lookup_page_cgroup(page); + /* + * Used bit is set without atomic ops but after smp_wmb(). + * For making pc->mem_cgroup visible, insert smp_rmb() here. + */ + smp_rmb(); + if (!PageCgroupUsed(pc)) + return NULL; + mz = page_cgroup_zoneinfo(pc); if (!mz) return NULL; -- cgit From 40d58138f832a48208cdce57d6572a033b1f7a23 Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Thu, 15 Jan 2009 13:51:12 -0800 Subject: memcg: fix error path of mem_cgroup_move_parent There is a bug in error path of mem_cgroup_move_parent. Extra refcnt got from try_charge should be dropped, and usages incremented by try_charge should be decremented in both error paths: A: failure at get_page_unless_zero B: failure at isolate_lru_page This bug makes this parent directory unremovable. In case of A, rmdir doesn't return, because res.usage doesn't go down to 0 at mem_cgroup_force_empty even after all the pc in lru are removed. In case of B, rmdir fails and returns -EBUSY, because it has extra ref counts even after res.usage goes down to 0. Signed-off-by: Daisuke Nishimura Acked-by: KAMEZAWA Hiroyuki Acked-by: Balbir Singh Cc: Pavel Emelyanov Cc: Li Zefan Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b6651277116..7be9b35d7ff 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -994,14 +994,15 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, if (pc->mem_cgroup != from) goto out; - css_put(&from->css); res_counter_uncharge(&from->res, PAGE_SIZE); mem_cgroup_charge_statistics(from, pc, false); if (do_swap_account) res_counter_uncharge(&from->memsw, PAGE_SIZE); + css_put(&from->css); + + css_get(&to->css); pc->mem_cgroup = to; mem_cgroup_charge_statistics(to, pc, true); - css_get(&to->css); ret = 0; out: unlock_page_cgroup(pc); @@ -1034,8 +1035,10 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, if (ret || !parent) return ret; - if (!get_page_unless_zero(page)) - return -EBUSY; + if (!get_page_unless_zero(page)) { + ret = -EBUSY; + goto uncharge; + } ret = isolate_lru_page(page); @@ -1044,19 +1047,23 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, ret = mem_cgroup_move_account(pc, child, parent); - /* drop extra refcnt by try_charge() (move_account increment one) */ - css_put(&parent->css); putback_lru_page(page); if (!ret) { put_page(page); + /* drop extra refcnt by try_charge() */ + css_put(&parent->css); return 0; } - /* uncharge if move fails */ + cancel: + put_page(page); +uncharge: + /* drop extra refcnt by try_charge() */ + css_put(&parent->css); + /* uncharge if move fails */ res_counter_uncharge(&parent->res, PAGE_SIZE); if (do_swap_account) res_counter_uncharge(&parent->memsw, PAGE_SIZE); - put_page(page); return ret; } -- cgit From c268e9946d7dc30ac4e55cdc3f43c8af1ae8153c Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Thu, 15 Jan 2009 13:51:13 -0800 Subject: memcg: fix hierarchical reclaim If root_mem has no children, last_scaned_child is set to root_mem itself. But after some children added to root_mem, mem_cgroup_get_next_node can mem_cgroup_put the root_mem although root_mem has not been mem_cgroup_get. This patch fixes this behavior by: - Set last_scanned_child to NULL if root_mem has no children or DFS search has returned to root_mem itself(root_mem is not a "child" of root_mem). Make mem_cgroup_get_first_node return root_mem in this case. There are no mem_cgroup_get/put for root_mem. - Rename mem_cgroup_get_next_node to __mem_cgroup_get_next_node, and mem_cgroup_get_first_node to mem_cgroup_get_next_node. Make mem_cgroup_hierarchical_reclaim call only new mem_cgroup_get_next_node. Signed-off-by: Daisuke Nishimura Cc: Balbir Singh Cc: KAMEZAWA Hiroyuki Cc: Pavel Emelyanov Cc: Li Zefan Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 68 ++++++++++++++++++++++++++++++--------------------------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7be9b35d7ff..322625f551c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -633,7 +633,7 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, * called with hierarchy_mutex held */ static struct mem_cgroup * -mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) +__mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) { struct cgroup *cgroup, *curr_cgroup, *root_cgroup; @@ -644,19 +644,16 @@ mem_cgroup_get_next_node(struct mem_cgroup *curr, struct mem_cgroup *root_mem) /* * Walk down to children */ - mem_cgroup_put(curr); cgroup = list_entry(curr_cgroup->children.next, struct cgroup, sibling); curr = mem_cgroup_from_cont(cgroup); - mem_cgroup_get(curr); goto done; } visit_parent: if (curr_cgroup == root_cgroup) { - mem_cgroup_put(curr); - curr = root_mem; - mem_cgroup_get(curr); + /* caller handles NULL case */ + curr = NULL; goto done; } @@ -664,11 +661,9 @@ visit_parent: * Goto next sibling */ if (curr_cgroup->sibling.next != &curr_cgroup->parent->children) { - mem_cgroup_put(curr); cgroup = list_entry(curr_cgroup->sibling.next, struct cgroup, sibling); curr = mem_cgroup_from_cont(cgroup); - mem_cgroup_get(curr); goto done; } @@ -679,7 +674,6 @@ visit_parent: goto visit_parent; done: - root_mem->last_scanned_child = curr; return curr; } @@ -689,40 +683,46 @@ done: * that to reclaim free pages from. */ static struct mem_cgroup * -mem_cgroup_get_first_node(struct mem_cgroup *root_mem) +mem_cgroup_get_next_node(struct mem_cgroup *root_mem) { struct cgroup *cgroup; - struct mem_cgroup *ret; + struct mem_cgroup *orig, *next; bool obsolete; - obsolete = mem_cgroup_is_obsolete(root_mem->last_scanned_child); - /* * Scan all children under the mem_cgroup mem */ mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); + + orig = root_mem->last_scanned_child; + obsolete = mem_cgroup_is_obsolete(orig); + if (list_empty(&root_mem->css.cgroup->children)) { - ret = root_mem; + /* + * root_mem might have children before and last_scanned_child + * may point to one of them. We put it later. + */ + if (orig) + VM_BUG_ON(!obsolete); + next = NULL; goto done; } - if (!root_mem->last_scanned_child || obsolete) { - - if (obsolete && root_mem->last_scanned_child) - mem_cgroup_put(root_mem->last_scanned_child); - + if (!orig || obsolete) { cgroup = list_first_entry(&root_mem->css.cgroup->children, struct cgroup, sibling); - ret = mem_cgroup_from_cont(cgroup); - mem_cgroup_get(ret); + next = mem_cgroup_from_cont(cgroup); } else - ret = mem_cgroup_get_next_node(root_mem->last_scanned_child, - root_mem); + next = __mem_cgroup_get_next_node(orig, root_mem); done: - root_mem->last_scanned_child = ret; + if (next) + mem_cgroup_get(next); + root_mem->last_scanned_child = next; + if (orig) + mem_cgroup_put(orig); mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); - return ret; + return (next) ? next : root_mem; } static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) @@ -780,21 +780,18 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, if (!root_mem->use_hierarchy) return ret; - next_mem = mem_cgroup_get_first_node(root_mem); + next_mem = mem_cgroup_get_next_node(root_mem); while (next_mem != root_mem) { if (mem_cgroup_is_obsolete(next_mem)) { - mem_cgroup_put(next_mem); - next_mem = mem_cgroup_get_first_node(root_mem); + next_mem = mem_cgroup_get_next_node(root_mem); continue; } ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, get_swappiness(next_mem)); if (mem_cgroup_check_under_limit(root_mem)) return 0; - mutex_lock(&mem_cgroup_subsys.hierarchy_mutex); - next_mem = mem_cgroup_get_next_node(next_mem, root_mem); - mutex_unlock(&mem_cgroup_subsys.hierarchy_mutex); + next_mem = mem_cgroup_get_next_node(root_mem); } return ret; } @@ -2254,7 +2251,14 @@ static void mem_cgroup_pre_destroy(struct cgroup_subsys *ss, static void mem_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cont) { - mem_cgroup_put(mem_cgroup_from_cont(cont)); + struct mem_cgroup *mem = mem_cgroup_from_cont(cont); + struct mem_cgroup *last_scanned_child = mem->last_scanned_child; + + if (last_scanned_child) { + VM_BUG_ON(!mem_cgroup_is_obsolete(last_scanned_child)); + mem_cgroup_put(last_scanned_child); + } + mem_cgroup_put(mem); } static int mem_cgroup_populate(struct cgroup_subsys *ss, -- cgit From 4d1c627389c8ba6d9e703208567ffcdbd356f682 Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Thu, 15 Jan 2009 13:51:14 -0800 Subject: memcg: make oom less frequently In previous implementation, mem_cgroup_try_charge checked the return value of mem_cgroup_try_to_free_pages, and just retried if some pages had been reclaimed. But now, try_charge(and mem_cgroup_hierarchical_reclaim called from it) only checks whether the usage is less than the limit. This patch tries to change the behavior as before to cause oom less frequently. Signed-off-by: Daisuke Nishimura Acked-by: Balbir Singh Acked-by: KAMEZAWA Hiroyuki Cc: Pavel Emelyanov Cc: Li Zefan Cc: Paul Menage Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 322625f551c..fb62b4335fa 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -773,10 +773,10 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, * but there might be left over accounting, even after children * have left. */ - ret = try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, + ret += try_to_free_mem_cgroup_pages(root_mem, gfp_mask, noswap, get_swappiness(root_mem)); if (mem_cgroup_check_under_limit(root_mem)) - return 0; + return 1; /* indicate reclaim has succeeded */ if (!root_mem->use_hierarchy) return ret; @@ -787,10 +787,10 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, next_mem = mem_cgroup_get_next_node(root_mem); continue; } - ret = try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, + ret += try_to_free_mem_cgroup_pages(next_mem, gfp_mask, noswap, get_swappiness(next_mem)); if (mem_cgroup_check_under_limit(root_mem)) - return 0; + return 1; /* indicate reclaim has succeeded */ next_mem = mem_cgroup_get_next_node(root_mem); } return ret; @@ -875,6 +875,8 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, gfp_mask, noswap); + if (ret) + continue; /* * try_to_free_mem_cgroup_pages() might not give us a full -- cgit From 46666d8ac42893f90edde7e57a11bc8749d7e89c Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 15 Jan 2009 13:51:15 -0800 Subject: revert "mm: vmalloc use mutex for purge" Revert commit e97a630eb0f5b8b380fd67504de6cedebb489003 ("mm: vmalloc use mutex for purge") Bryan Donlan reports: : After testing 2.6.29-rc1 on xen-x86 with a btrfs root filesystem, I : got the OOPS quoted below and a hard freeze shortly after boot. : Boot messages and config are attached. : : ------------[ cut here ]------------ : Kernel BUG at c05ef80d [verbose debug info unavailable] : invalid opcode: 0000 [#1] SMP : last sysfs file: /sys/block/xvdc/size : Modules linked in: : : Pid: 0, comm: swapper Not tainted (2.6.29-rc1 #6) : EIP: 0061:[] EFLAGS: 00010087 CPU: 2 : EIP is at schedule+0x7cd/0x950 : EAX: d5aeca80 EBX: 00000002 ECX: 00000000 EDX: d4cb9a40 : ESI: c12f5600 EDI: d4cb9a40 EBP: d6033fa4 ESP: d6033ef4 : DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0069 : Process swapper (pid: 0, ti=d6032000 task=d6020b70 task.ti=d6032000) : Stack: : 000d85bc 00000000 000186a0 00000000 0dd11410 c0105417 c12efe00 0dc367c3 : 00000011 c0105d46 d5a5d310 deadbeef d4cb9a40 c07cc600 c05f1340 c12e0060 : deadbeef d6020b70 d6020d08 00000002 c014377d 00000000 c12f5600 00002c22 : Call Trace: : [] xen_force_evtchn_callback+0x17/0x30 : [] check_events+0x8/0x12 : [] _spin_unlock_irqrestore+0x20/0x40 : [] hrtimer_start_range_ns+0x12d/0x2e0 : [] tick_nohz_restart_sched_tick+0x146/0x160 : [] cpu_idle+0xa5/0xc0 and bisected it to this commit. Let's remove it now while we have a think about the problem. Reported-by: Bryan Donlan Tested-by: Christophe Saout Cc: Nick Piggin Cc: Ingo Molnar Cc: Jeremy Fitzhardinge Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmalloc.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 7e00b280648..75f49d312e8 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -496,7 +495,7 @@ static atomic_t vmap_lazy_nr = ATOMIC_INIT(0); static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, int sync, int force_flush) { - static DEFINE_MUTEX(purge_lock); + static DEFINE_SPINLOCK(purge_lock); LIST_HEAD(valist); struct vmap_area *va; int nr = 0; @@ -507,10 +506,10 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, * the case that isn't actually used at the moment anyway. */ if (!sync && !force_flush) { - if (!mutex_trylock(&purge_lock)) + if (!spin_trylock(&purge_lock)) return; } else - mutex_lock(&purge_lock); + spin_lock(&purge_lock); rcu_read_lock(); list_for_each_entry_rcu(va, &vmap_area_list, list) { @@ -542,7 +541,7 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, __free_vmap_area(va); spin_unlock(&vmap_area_lock); } - mutex_unlock(&purge_lock); + spin_unlock(&purge_lock); } /* -- cgit From 70b66cbfd3316b792a855cb9a2574e85f1a63d0f Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Thu, 15 Jan 2009 13:51:17 -0800 Subject: alpha: nautilus - fix compile failure with gcc-4.3 init_srm_irq() deals with irq's #16 and above, but size of irq_desc array on nautilus and some other system types is 16. So gcc-4.3 complains that "array subscript is above array bounds", even though this function is never called on those systems. This adds a check for NR_IRQS <= 16, which effectively optimizes init_srm_irq() code away on problematic platforms. Thanks to Daniel Drake for detailed analysis of the problem. Signed-off-by: Ivan Kokshaysky Cc: Richard Henderson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/irq_srm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/alpha/kernel/irq_srm.c b/arch/alpha/kernel/irq_srm.c index 32212014fbe..a03fbca4940 100644 --- a/arch/alpha/kernel/irq_srm.c +++ b/arch/alpha/kernel/irq_srm.c @@ -63,6 +63,8 @@ init_srm_irqs(long max, unsigned long ignore_mask) { long i; + if (NR_IRQS <= 16) + return; for (i = 16; i < max; ++i) { if (i < 64 && ((ignore_mask >> i) & 1)) continue; -- cgit From 2f88d151cb8e73587983d7feccd70672ff6730fe Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Thu, 15 Jan 2009 13:51:18 -0800 Subject: alpha: nautilus - fix hang on boot Recently introduced generic pci_common_swizzle() relies on bus->self being NULL for the root PCI bus. But on nautilus bus->self points to the host bridge device, which is necessary as we do a root bus sizing on this system. As a result, pci_common_swizzle() loops infinitely. This worked until 2.6.29-rc1 because the alpha-specific swizzle routine checked for bus->parent == NULL (instead of bus->self). Fixed by clearing bus->self after bus sizing is done. Signed-off-by: Ivan Kokshaysky Cc: Richard Henderson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/sys_nautilus.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index a7f23b5ab81..99c0f46f6b9 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -245,6 +245,10 @@ nautilus_init_pci(void) IRONGATE0->pci_mem = pci_mem; pci_bus_assign_resources(bus); + + /* pci_common_swizzle() relies on bus->self being NULL + for the root bus, so just clear it. */ + bus->self = NULL; pci_fixup_irqs(alpha_mv.pci_swizzle, alpha_mv.pci_map_irq); } -- cgit From 5f7dc5d75076fd1c1fc6bc09f2467509d20db24a Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Thu, 15 Jan 2009 13:51:19 -0800 Subject: alpha: fix RTC on marvel Unlike other alphas, marvel doesn't have real PC-style CMOS clock hardware - RTC accesses are emulated via PAL calls. Unfortunately, for unknown reason these calls work only on CPU #0. So current implementation for arbitrary CPU makes CMOS_READ/WRITE to be executed on CPU #0 via IPI. However, for obvious reason this doesn't work with standard get/set_rtc_time() functions, where a bunch of CMOS accesses is done with disabled interrupts. Solved by making the IPI calls for entire get/set_rtc_time() functions, not for individual CMOS accesses. Which is also a lot more effective performance-wise. The patch is largely based on the code from Jay Estabrook. My changes: - tweak asm-generic/rtc.h by adding a couple of #defines to avoid a massive code duplication in arch/alpha/include/asm/rtc.h; - sys_marvel.c: fix get/set_rtc_time() return values (Jay's FIXMEs). NOTE: this fixes *only* LIB_RTC drivers. Legacy (CONFIG_RTC) driver wont't work on marvel. Actually I think that we should just disable CONFIG_RTC on alpha (maybe in 2.6.30?), like most other arches - AFAIK, all modern distributions use LIB_RTC anyway. Signed-off-by: Ivan Kokshaysky Cc: Richard Henderson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/machvec.h | 4 +++ arch/alpha/include/asm/rtc.h | 12 ++++++--- arch/alpha/kernel/core_marvel.c | 10 +------ arch/alpha/kernel/machvec_impl.h | 5 +++- arch/alpha/kernel/proto.h | 2 ++ arch/alpha/kernel/sys_jensen.c | 2 ++ arch/alpha/kernel/sys_marvel.c | 56 +++++++++++++++++++++++++++++++++++++++- arch/alpha/kernel/time.c | 10 +++++++ include/asm-generic/rtc.h | 14 +++++++--- 9 files changed, 98 insertions(+), 17 deletions(-) diff --git a/arch/alpha/include/asm/machvec.h b/arch/alpha/include/asm/machvec.h index a86c083cdf7..fea4ea75b79 100644 --- a/arch/alpha/include/asm/machvec.h +++ b/arch/alpha/include/asm/machvec.h @@ -21,6 +21,7 @@ struct pci_dev; struct pci_ops; struct pci_controller; struct _alpha_agp_info; +struct rtc_time; struct alpha_machine_vector { @@ -94,6 +95,9 @@ struct alpha_machine_vector struct _alpha_agp_info *(*agp_info)(void); + unsigned int (*rtc_get_time)(struct rtc_time *); + int (*rtc_set_time)(struct rtc_time *); + const char *vector_name; /* NUMA information */ diff --git a/arch/alpha/include/asm/rtc.h b/arch/alpha/include/asm/rtc.h index 4e854b1333e..1f7fba671ae 100644 --- a/arch/alpha/include/asm/rtc.h +++ b/arch/alpha/include/asm/rtc.h @@ -1,9 +1,15 @@ #ifndef _ALPHA_RTC_H #define _ALPHA_RTC_H -/* - * Alpha uses the default access methods for the RTC. - */ +#if defined(CONFIG_ALPHA_GENERIC) +# define get_rtc_time alpha_mv.rtc_get_time +# define set_rtc_time alpha_mv.rtc_set_time +#else +# if defined(CONFIG_ALPHA_MARVEL) && defined(CONFIG_SMP) +# define get_rtc_time marvel_get_rtc_time +# define set_rtc_time marvel_set_rtc_time +# endif +#endif #include diff --git a/arch/alpha/kernel/core_marvel.c b/arch/alpha/kernel/core_marvel.c index 9cd8dca742a..e302daecbe5 100644 --- a/arch/alpha/kernel/core_marvel.c +++ b/arch/alpha/kernel/core_marvel.c @@ -658,16 +658,8 @@ __marvel_rtc_io(u8 b, unsigned long addr, int write) rtc_access.data = bcd2bin(b); rtc_access.function = 0x48 + !write; /* GET/PUT_TOY */ -#ifdef CONFIG_SMP - if (smp_processor_id() != boot_cpuid) - smp_call_function_single(boot_cpuid, - __marvel_access_rtc, - &rtc_access, 1); - else - __marvel_access_rtc(&rtc_access); -#else __marvel_access_rtc(&rtc_access); -#endif + ret = bin2bcd(rtc_access.data); break; diff --git a/arch/alpha/kernel/machvec_impl.h b/arch/alpha/kernel/machvec_impl.h index 466c9dff818..512685f7809 100644 --- a/arch/alpha/kernel/machvec_impl.h +++ b/arch/alpha/kernel/machvec_impl.h @@ -40,7 +40,10 @@ #define CAT1(x,y) x##y #define CAT(x,y) CAT1(x,y) -#define DO_DEFAULT_RTC .rtc_port = 0x70 +#define DO_DEFAULT_RTC \ + .rtc_port = 0x70, \ + .rtc_get_time = common_get_rtc_time, \ + .rtc_set_time = common_set_rtc_time #define DO_EV4_MMU \ .max_asn = EV4_MAX_ASN, \ diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h index 708d5ca8778..fe14c6747cd 100644 --- a/arch/alpha/kernel/proto.h +++ b/arch/alpha/kernel/proto.h @@ -145,6 +145,8 @@ extern void smp_percpu_timer_interrupt(struct pt_regs *); extern irqreturn_t timer_interrupt(int irq, void *dev); extern void common_init_rtc(void); extern unsigned long est_cycle_freq; +extern unsigned int common_get_rtc_time(struct rtc_time *time); +extern int common_set_rtc_time(struct rtc_time *time); /* smc37c93x.c */ extern void SMC93x_Init(void); diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index 2c3de97de46..e2516f9a896 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -261,6 +261,8 @@ struct alpha_machine_vector jensen_mv __initmv = { .machine_check = jensen_machine_check, .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, .rtc_port = 0x170, + .rtc_get_time = common_get_rtc_time, + .rtc_set_time = common_set_rtc_time, .nr_irqs = 16, .device_interrupt = jensen_device_interrupt, diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index 828449cd263..c5a1a2438c6 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "proto.h" #include "err_impl.h" @@ -426,6 +427,57 @@ marvel_init_rtc(void) init_rtc_irq(); } +struct marvel_rtc_time { + struct rtc_time *time; + int retval; +}; + +#ifdef CONFIG_SMP +static void +smp_get_rtc_time(void *data) +{ + struct marvel_rtc_time *mrt = data; + mrt->retval = __get_rtc_time(mrt->time); +} + +static void +smp_set_rtc_time(void *data) +{ + struct marvel_rtc_time *mrt = data; + mrt->retval = __set_rtc_time(mrt->time); +} +#endif + +static unsigned int +marvel_get_rtc_time(struct rtc_time *time) +{ +#ifdef CONFIG_SMP + struct marvel_rtc_time mrt; + + if (smp_processor_id() != boot_cpuid) { + mrt.time = time; + smp_call_function_single(boot_cpuid, smp_get_rtc_time, &mrt, 1); + return mrt.retval; + } +#endif + return __get_rtc_time(time); +} + +static int +marvel_set_rtc_time(struct rtc_time *time) +{ +#ifdef CONFIG_SMP + struct marvel_rtc_time mrt; + + if (smp_processor_id() != boot_cpuid) { + mrt.time = time; + smp_call_function_single(boot_cpuid, smp_set_rtc_time, &mrt, 1); + return mrt.retval; + } +#endif + return __set_rtc_time(time); +} + static void marvel_smp_callin(void) { @@ -466,7 +518,9 @@ marvel_smp_callin(void) struct alpha_machine_vector marvel_ev7_mv __initmv = { .vector_name = "MARVEL/EV7", DO_EV7_MMU, - DO_DEFAULT_RTC, + .rtc_port = 0x70, + .rtc_get_time = marvel_get_rtc_time, + .rtc_set_time = marvel_set_rtc_time, DO_MARVEL_IO, .machine_check = marvel_machine_check, .max_isa_dma_address = ALPHA_MAX_ISA_DMA_ADDRESS, diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index e6a231435cb..b04e2cbf23a 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -180,6 +181,15 @@ common_init_rtc(void) init_rtc_irq(); } +unsigned int common_get_rtc_time(struct rtc_time *time) +{ + return __get_rtc_time(time); +} + +int common_set_rtc_time(struct rtc_time *time) +{ + return __set_rtc_time(time); +} /* Validate a computed cycle counter result against the known bounds for the given processor core. There's too much brokenness in the way of diff --git a/include/asm-generic/rtc.h b/include/asm-generic/rtc.h index 89061c1a67d..763e3b060f4 100644 --- a/include/asm-generic/rtc.h +++ b/include/asm-generic/rtc.h @@ -42,7 +42,7 @@ static inline unsigned char rtc_is_updating(void) return uip; } -static inline unsigned int get_rtc_time(struct rtc_time *time) +static inline unsigned int __get_rtc_time(struct rtc_time *time) { unsigned char ctrl; unsigned long flags; @@ -108,8 +108,12 @@ static inline unsigned int get_rtc_time(struct rtc_time *time) return RTC_24H; } +#ifndef get_rtc_time +#define get_rtc_time __get_rtc_time +#endif + /* Set the current date and time in the real time clock. */ -static inline int set_rtc_time(struct rtc_time *time) +static inline int __set_rtc_time(struct rtc_time *time) { unsigned long flags; unsigned char mon, day, hrs, min, sec; @@ -190,11 +194,15 @@ static inline int set_rtc_time(struct rtc_time *time) return 0; } +#ifndef set_rtc_time +#define set_rtc_time __set_rtc_time +#endif + static inline unsigned int get_rtc_ss(void) { struct rtc_time h; - get_rtc_time(&h); + __get_rtc_time(&h); return h.tm_sec; } -- cgit From 6946ce00c944131464d408c1e9b7b290d8bc3d15 Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Thu, 15 Jan 2009 13:51:20 -0800 Subject: alpha: .gitignore vmlinux.lds Signed-off-by: Ivan Kokshaysky Cc: Richard Henderson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/.gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 arch/alpha/kernel/.gitignore diff --git a/arch/alpha/kernel/.gitignore b/arch/alpha/kernel/.gitignore new file mode 100644 index 00000000000..c5f676c3c22 --- /dev/null +++ b/arch/alpha/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds -- cgit From 5da7f3d71e243ef5c464967581414d29c72bab75 Mon Sep 17 00:00:00 2001 From: Ivan Kokshaysky Date: Thu, 15 Jan 2009 13:51:20 -0800 Subject: alpha: make pte_alloc_one_kernel() inline As it's just a single call to __get_free_page(). Signed-off-by: Ivan Kokshaysky Cc: Richard Henderson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/include/asm/pgalloc.h | 7 ++++++- arch/alpha/mm/init.c | 7 ------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h index fd090155dcc..bc2a0daf2d9 100644 --- a/arch/alpha/include/asm/pgalloc.h +++ b/arch/alpha/include/asm/pgalloc.h @@ -50,7 +50,12 @@ pmd_free(struct mm_struct *mm, pmd_t *pmd) free_page((unsigned long)pmd); } -extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long addr); +static inline pte_t * +pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +{ + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + return pte; +} static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 234e42b8ee7..5d7a16eab31 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -59,13 +59,6 @@ pgd_alloc(struct mm_struct *mm) return ret; } -pte_t * -pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) -{ - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); - return pte; -} - /* * BAD_PAGE is the page that is used for page faults when linux -- cgit From 5b019e99016f3a692ba45bf68fba73a402d7c01a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 15 Jan 2009 13:51:21 -0800 Subject: lib/idr.c: use kmem_cache_zalloc() for the idr_layer cache David points out that the idr_remove_all() function returns unused slabs to the kmem cache, but needs to zero them first or else they will be uninitialized upon next use. This causes crashes which have been observed in the firewire subsystem. He fixed this by zeroing the object before freeing it in idr_remove_all(). But we agree that simply removing the constructor and zeroing the object at allocation time is simpler than relying upon slab constructor machinery and might even be faster. This problem was introduced by "idr: make idr_remove rcu-safe" (commit cf481c20c476ad2c0febdace9ce23f5a4db19582), which was first released in 2.6.27. There are no known codesites which trigger this bug in 2.6.27 or 2.6.28. The post-2.6.28 firewire changes are the only known triggerer. There might of course be not-yet-discovered triggerers in 2.6.27 and 2.6.28, and there might be out-of-tree triggerers which are added to those kernel versions. I'll let the -stable guys decide whether they want to backport this fix. Reported-by: David Moore Cc: Stefan Richter Cc: Nadia Derbey Cc: Paul E. McKenney Cc: Manfred Spraul Cc: Kristian Hgsberg Acked-by: Pekka Enberg Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/idr.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/lib/idr.c b/lib/idr.c index 6b7cfa6508f..c11c5765cde 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -121,7 +121,7 @@ int idr_pre_get(struct idr *idp, gfp_t gfp_mask) { while (idp->id_free_cnt < IDR_FREE_MAX) { struct idr_layer *new; - new = kmem_cache_alloc(idr_layer_cache, gfp_mask); + new = kmem_cache_zalloc(idr_layer_cache, gfp_mask); if (new == NULL) return (0); move_to_free_list(idp, new); @@ -623,16 +623,10 @@ void *idr_replace(struct idr *idp, void *ptr, int id) } EXPORT_SYMBOL(idr_replace); -static void idr_cache_ctor(void *idr_layer) -{ - memset(idr_layer, 0, sizeof(struct idr_layer)); -} - void __init idr_init_cache(void) { idr_layer_cache = kmem_cache_create("idr_layer_cache", - sizeof(struct idr_layer), 0, SLAB_PANIC, - idr_cache_ctor); + sizeof(struct idr_layer), 0, SLAB_PANIC, NULL); } /** -- cgit From 9e0c79782143a816ba7d7f0f6e195091a97053f6 Mon Sep 17 00:00:00 2001 From: Eric Piel Date: Thu, 15 Jan 2009 13:51:23 -0800 Subject: lis3lv02d: merge with leds hp disk Move the second part of the HP laptop disk protection functionality (a red led) to the same driver. From a purely Linux developer's point of view, the led and the accelerometer have nothing related. However, they correspond to the same ACPI functionality, and so will always be used together, moreover as they share the same ACPI PNP alias, there is no other simple to allow to have same loaded at the same time if they are not in the same module. Also make it requires the led class to compile and update the Kconfig text. Signed-off-by: Pavel Machek Signed-off-by: Eric Piel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/Kconfig | 14 ++++- drivers/hwmon/hp_accel.c | 43 +++++++++++++- drivers/leds/Kconfig | 7 --- drivers/leds/Makefile | 1 - drivers/leds/leds-hp-disk.c | 137 -------------------------------------------- 5 files changed, 52 insertions(+), 150 deletions(-) delete mode 100644 drivers/leds/leds-hp-disk.c diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 4b33bc82cc2..54b43bea5e4 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -861,6 +861,8 @@ config SENSORS_HDAPS config SENSORS_LIS3LV02D tristate "STMicroeletronics LIS3LV02Dx three-axis digital accelerometer" depends on ACPI && INPUT + select NEW_LEDS + select LEDS_CLASS default n help This driver provides support for the LIS3LV02Dx accelerometer. In @@ -872,10 +874,16 @@ config SENSORS_LIS3LV02D /sys/devices/platform/lis3lv02d. This driver also provides an absolute input class device, allowing - the laptop to act as a pinball machine-esque joystick. + the laptop to act as a pinball machine-esque joystick. On HP laptops, + if the led infrastructure is activated, support for a led indicating + disk protection will be provided as hp:red:hddprotection. - This driver can also be built as a module. If so, the module - will be called lis3lv02d. + This driver can also be built as modules. If so, the core module + will be called lis3lv02d and a specific module for HP laptops will be + called hp_accel. + + Say Y here if you have an applicable laptop and want to experience + the awesome power of lis3lv02d. config SENSORS_APPLESMC tristate "Apple SMC (Motion sensor, light sensor, keyboard backlight)" diff --git a/drivers/hwmon/hp_accel.c b/drivers/hwmon/hp_accel.c index bf8d4058057..86a0f51d99d 100644 --- a/drivers/hwmon/hp_accel.c +++ b/drivers/hwmon/hp_accel.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include "lis3lv02d.h" @@ -154,10 +155,34 @@ static struct dmi_system_id lis3lv02d_dmi_ids[] = { */ }; +static acpi_status hpled_acpi_write(acpi_handle handle, int reg) +{ + unsigned long long ret; /* Not used when writing */ + union acpi_object in_obj[1]; + struct acpi_object_list args = { 1, in_obj }; + + in_obj[0].type = ACPI_TYPE_INTEGER; + in_obj[0].integer.value = reg; + + return acpi_evaluate_integer(handle, "ALED", &args, &ret); +} + +static void hpled_set(struct led_classdev *led_cdev, + enum led_brightness value) +{ + hpled_acpi_write(adev.device->handle, !!value); +} + +static struct led_classdev hpled_led = { + .name = "hp:red:hddprotection", + .default_trigger = "none", + .brightness_set = hpled_set, +}; static int lis3lv02d_add(struct acpi_device *device) { u8 val; + int ret; if (!device) return -EINVAL; @@ -183,7 +208,17 @@ static int lis3lv02d_add(struct acpi_device *device) adev.ac = lis3lv02d_axis_normal; } - return lis3lv02d_init_device(&adev); + ret = led_classdev_register(NULL, &hpled_led); + if (ret) + return ret; + + ret = lis3lv02d_init_device(&adev); + if (ret) { + led_classdev_unregister(&hpled_led); + return ret; + } + + return ret; } static int lis3lv02d_remove(struct acpi_device *device, int type) @@ -194,6 +229,8 @@ static int lis3lv02d_remove(struct acpi_device *device, int type) lis3lv02d_joystick_disable(); lis3lv02d_poweroff(device->handle); + led_classdev_unregister(&hpled_led); + return lis3lv02d_remove_fs(); } @@ -203,6 +240,7 @@ static int lis3lv02d_suspend(struct acpi_device *device, pm_message_t state) { /* make sure the device is off when we suspend */ lis3lv02d_poweroff(device->handle); + led_classdev_suspend(&hpled_led); return 0; } @@ -215,6 +253,7 @@ static int lis3lv02d_resume(struct acpi_device *device) else lis3lv02d_poweroff(device->handle); mutex_unlock(&adev.lock); + led_classdev_resume(&hpled_led); return 0; } #else @@ -256,7 +295,7 @@ static void __exit lis3lv02d_exit_module(void) acpi_bus_unregister_driver(&lis3lv02d_driver); } -MODULE_DESCRIPTION("Glue between LIS3LV02Dx and HP ACPI BIOS"); +MODULE_DESCRIPTION("Glue between LIS3LV02Dx and HP ACPI BIOS and support for disk protection LED."); MODULE_AUTHOR("Yan Burman, Eric Piel, Pavel Machek"); MODULE_LICENSE("GPL"); diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig index a4a1ae21463..742713611bc 100644 --- a/drivers/leds/Kconfig +++ b/drivers/leds/Kconfig @@ -119,13 +119,6 @@ config LEDS_GPIO outputs. To be useful the particular board must have LEDs and they must be connected to the GPIO lines. -config LEDS_HP_DISK - tristate "LED Support for disk protection LED on HP notebooks" - depends on LEDS_CLASS && ACPI - help - This option enable support for disk protection LED, found on - newer HP notebooks. - config LEDS_CLEVO_MAIL tristate "Mail LED on Clevo notebook (EXPERIMENTAL)" depends on LEDS_CLASS && X86 && SERIO_I8042 && DMI && EXPERIMENTAL diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile index bc247cb02e8..9d76f0f160a 100644 --- a/drivers/leds/Makefile +++ b/drivers/leds/Makefile @@ -23,7 +23,6 @@ obj-$(CONFIG_LEDS_HP6XX) += leds-hp6xx.o obj-$(CONFIG_LEDS_FSG) += leds-fsg.o obj-$(CONFIG_LEDS_PCA955X) += leds-pca955x.o obj-$(CONFIG_LEDS_DA903X) += leds-da903x.o -obj-$(CONFIG_LEDS_HP_DISK) += leds-hp-disk.o obj-$(CONFIG_LEDS_WM8350) += leds-wm8350.o # LED Triggers diff --git a/drivers/leds/leds-hp-disk.c b/drivers/leds/leds-hp-disk.c deleted file mode 100644 index d786adc8c5e..00000000000 --- a/drivers/leds/leds-hp-disk.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * leds-hp-disk.c - driver for HP "hard disk protection" LED - * - * Copyright (C) 2008 Pavel Machek - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DRIVER_NAME "leds-hp-disk" -#define ACPI_MDPS_CLASS "led" - -/* For automatic insertion of the module */ -static struct acpi_device_id hpled_device_ids[] = { - {"HPQ0004", 0}, /* HP Mobile Data Protection System PNP */ - {"", 0}, -}; -MODULE_DEVICE_TABLE(acpi, hpled_device_ids); - -struct acpi_hpled { - struct acpi_device *device; /* The ACPI device */ -}; - -static struct acpi_hpled adev; - -static acpi_status hpled_acpi_write(acpi_handle handle, int reg) -{ - unsigned long long ret; /* Not used when writing */ - union acpi_object in_obj[1]; - struct acpi_object_list args = { 1, in_obj }; - - in_obj[0].type = ACPI_TYPE_INTEGER; - in_obj[0].integer.value = reg; - - return acpi_evaluate_integer(handle, "ALED", &args, &ret); -} - -static void hpled_set(struct led_classdev *led_cdev, - enum led_brightness value) -{ - hpled_acpi_write(adev.device->handle, !!value); -} - -static struct led_classdev hpled_led = { - .name = "hp:red:hddprotection", - .default_trigger = "heartbeat", - .brightness_set = hpled_set, - .flags = LED_CORE_SUSPENDRESUME, -}; - -static int hpled_add(struct acpi_device *device) -{ - int ret; - - if (!device) - return -EINVAL; - - adev.device = device; - strcpy(acpi_device_name(device), DRIVER_NAME); - strcpy(acpi_device_class(device), ACPI_MDPS_CLASS); - device->driver_data = &adev; - - ret = led_classdev_register(NULL, &hpled_led); - return ret; -} - -static int hpled_remove(struct acpi_device *device, int type) -{ - if (!device) - return -EINVAL; - - led_classdev_unregister(&hpled_led); - return 0; -} - - - -static struct acpi_driver leds_hp_driver = { - .name = DRIVER_NAME, - .class = ACPI_MDPS_CLASS, - .ids = hpled_device_ids, - .ops = { - .add = hpled_add, - .remove = hpled_remove, - } -}; - -static int __init hpled_init_module(void) -{ - int ret; - - if (acpi_disabled) - return -ENODEV; - - ret = acpi_bus_register_driver(&leds_hp_driver); - if (ret < 0) - return ret; - - printk(KERN_INFO DRIVER_NAME " driver loaded.\n"); - - return 0; -} - -static void __exit hpled_exit_module(void) -{ - acpi_bus_unregister_driver(&leds_hp_driver); -} - -MODULE_DESCRIPTION("Driver for HP disk protection LED"); -MODULE_AUTHOR("Pavel Machek "); -MODULE_LICENSE("GPL"); - -module_init(hpled_init_module); -module_exit(hpled_exit_module); -- cgit From 219beb291ba9275dd676578724103abed4cfbfe3 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 15 Jan 2009 13:51:24 -0800 Subject: lis3: fix documentation to fit into 80 columns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix lis3 documentation to fit into 80 columns. Signed-off-by: Pavel Machek Cc: Éric Piel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/hwmon/lis3lv02d | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/Documentation/hwmon/lis3lv02d b/Documentation/hwmon/lis3lv02d index 65dfb0c0fd6..0fcfc4a7ccd 100644 --- a/Documentation/hwmon/lis3lv02d +++ b/Documentation/hwmon/lis3lv02d @@ -13,18 +13,21 @@ Author: Description ----------- -This driver provides support for the accelerometer found in various HP laptops -sporting the feature officially called "HP Mobile Data Protection System 3D" or -"HP 3D DriveGuard". It detect automatically laptops with this sensor. Known models -(for now the HP 2133, nc6420, nc2510, nc8510, nc84x0, nw9440 and nx9420) will -have their axis automatically oriented on standard way (eg: you can directly -play neverball). The accelerometer data is readable via +This driver provides support for the accelerometer found in various HP +laptops sporting the feature officially called "HP Mobile Data +Protection System 3D" or "HP 3D DriveGuard". It detect automatically +laptops with this sensor. Known models (for now the HP 2133, nc6420, +nc2510, nc8510, nc84x0, nw9440 and nx9420) will have their axis +automatically oriented on standard way (eg: you can directly play +neverball). The accelerometer data is readable via /sys/devices/platform/lis3lv02d. Sysfs attributes under /sys/devices/platform/lis3lv02d/: position - 3D position that the accelerometer reports. Format: "(x,y,z)" -calibrate - read: values (x, y, z) that are used as the base for input class device operation. - write: forces the base to be recalibrated with the current position. +calibrate - read: values (x, y, z) that are used as the base for input + class device operation. + write: forces the base to be recalibrated with the current + position. rate - reports the sampling rate of the accelerometer device in HZ This driver also provides an absolute input class device, allowing @@ -39,11 +42,12 @@ the accelerometer are converted into a "standard" organisation of the axes * When the laptop is horizontal the position reported is about 0 for X and Y and a positive value for Z * If the left side is elevated, X increases (becomes positive) - * If the front side (where the touchpad is) is elevated, Y decreases (becomes negative) + * If the front side (where the touchpad is) is elevated, Y decreases + (becomes negative) * If the laptop is put upside-down, Z becomes negative -If your laptop model is not recognized (cf "dmesg"), you can send an email to the -authors to add it to the database. When reporting a new laptop, please include -the output of "dmidecode" plus the value of /sys/devices/platform/lis3lv02d/position -in these four cases. +If your laptop model is not recognized (cf "dmesg"), you can send an +email to the authors to add it to the database. When reporting a new +laptop, please include the output of "dmidecode" plus the value of +/sys/devices/platform/lis3lv02d/position in these four cases. -- cgit From 9e1c9d865543593ee92ec3a5075f064dec981a96 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Thu, 15 Jan 2009 13:51:24 -0800 Subject: hp_accel: do not call ACPI from invalid context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The LED on HP notebooks is connected through ACPI. That unfortunately means that it needs to be delayed by using schedule_work() to avoid calling the ACPI interpreter from an invalid context. [akpm@linux-foundation.org: use flush_work() rather than sort-of reimplementing it] Signed-off-by: Pavel Machek Cc: Éric Piel Cc: Len Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/hwmon/hp_accel.c | 68 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 19 deletions(-) diff --git a/drivers/hwmon/hp_accel.c b/drivers/hwmon/hp_accel.c index 86a0f51d99d..03705240000 100644 --- a/drivers/hwmon/hp_accel.c +++ b/drivers/hwmon/hp_accel.c @@ -3,7 +3,7 @@ * * Copyright (C) 2007-2008 Yan Burman * Copyright (C) 2008 Eric Piel - * Copyright (C) 2008 Pavel Machek + * Copyright (C) 2008-2009 Pavel Machek * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -44,6 +44,36 @@ #define DRIVER_NAME "lis3lv02d" #define ACPI_MDPS_CLASS "accelerometer" +/* Delayed LEDs infrastructure ------------------------------------ */ + +/* Special LED class that can defer work */ +struct delayed_led_classdev { + struct led_classdev led_classdev; + struct work_struct work; + enum led_brightness new_brightness; + + unsigned int led; /* For driver */ + void (*set_brightness)(struct delayed_led_classdev *data, enum led_brightness value); +}; + +static inline void delayed_set_status_worker(struct work_struct *work) +{ + struct delayed_led_classdev *data = + container_of(work, struct delayed_led_classdev, work); + + data->set_brightness(data, data->new_brightness); +} + +static inline void delayed_sysfs_set(struct led_classdev *led_cdev, + enum led_brightness brightness) +{ + struct delayed_led_classdev *data = container_of(led_cdev, + struct delayed_led_classdev, led_classdev); + data->new_brightness = brightness; + schedule_work(&data->work); +} + +/* HP-specific accelerometer driver ------------------------------------ */ /* For automatic insertion of the module */ static struct acpi_device_id lis3lv02d_device_ids[] = { @@ -155,28 +185,27 @@ static struct dmi_system_id lis3lv02d_dmi_ids[] = { */ }; -static acpi_status hpled_acpi_write(acpi_handle handle, int reg) +static void hpled_set(struct delayed_led_classdev *led_cdev, enum led_brightness value) { + acpi_handle handle = adev.device->handle; unsigned long long ret; /* Not used when writing */ union acpi_object in_obj[1]; struct acpi_object_list args = { 1, in_obj }; in_obj[0].type = ACPI_TYPE_INTEGER; - in_obj[0].integer.value = reg; + in_obj[0].integer.value = !!value; - return acpi_evaluate_integer(handle, "ALED", &args, &ret); -} - -static void hpled_set(struct led_classdev *led_cdev, - enum led_brightness value) -{ - hpled_acpi_write(adev.device->handle, !!value); + acpi_evaluate_integer(handle, "ALED", &args, &ret); } -static struct led_classdev hpled_led = { - .name = "hp:red:hddprotection", - .default_trigger = "none", - .brightness_set = hpled_set, +static struct delayed_led_classdev hpled_led = { + .led_classdev = { + .name = "hp::hddprotect", + .default_trigger = "none", + .brightness_set = delayed_sysfs_set, + .flags = LED_CORE_SUSPENDRESUME, + }, + .set_brightness = hpled_set, }; static int lis3lv02d_add(struct acpi_device *device) @@ -208,13 +237,15 @@ static int lis3lv02d_add(struct acpi_device *device) adev.ac = lis3lv02d_axis_normal; } - ret = led_classdev_register(NULL, &hpled_led); + INIT_WORK(&hpled_led.work, delayed_set_status_worker); + ret = led_classdev_register(NULL, &hpled_led.led_classdev); if (ret) return ret; ret = lis3lv02d_init_device(&adev); if (ret) { - led_classdev_unregister(&hpled_led); + flush_work(&hpled_led.work); + led_classdev_unregister(&hpled_led.led_classdev); return ret; } @@ -229,7 +260,8 @@ static int lis3lv02d_remove(struct acpi_device *device, int type) lis3lv02d_joystick_disable(); lis3lv02d_poweroff(device->handle); - led_classdev_unregister(&hpled_led); + flush_work(&hpled_led.work); + led_classdev_unregister(&hpled_led.led_classdev); return lis3lv02d_remove_fs(); } @@ -240,7 +272,6 @@ static int lis3lv02d_suspend(struct acpi_device *device, pm_message_t state) { /* make sure the device is off when we suspend */ lis3lv02d_poweroff(device->handle); - led_classdev_suspend(&hpled_led); return 0; } @@ -253,7 +284,6 @@ static int lis3lv02d_resume(struct acpi_device *device) else lis3lv02d_poweroff(device->handle); mutex_unlock(&adev.lock); - led_classdev_resume(&hpled_led); return 0; } #else -- cgit From 0eb253e223c88b982461e59154fcad1b82597592 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Jan 2009 13:51:25 -0800 Subject: memcg: fix section mismatch At system boot when creating the top cgroup, mem_cgroup_create() calls enable_swap_cgroup() which is marked as __init, so mark mem_cgroup_create() as __ref to avoid false section mismatch warning. Reported-by: Rakib Mullick Signed-off-by: Li Zefan Acked-by; KAMEZAWA Hiroyuki Cc: Balbir Singh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fb62b4335fa..f0dc076adf0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2202,7 +2202,7 @@ static void __init enable_swap_cgroup(void) } #endif -static struct cgroup_subsys_state * +static struct cgroup_subsys_state * __ref mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) { struct mem_cgroup *mem, *parent; -- cgit From 068b38c1fa7a9210608f27ac521897ccc5f9b726 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 15 Jan 2009 13:51:26 -0800 Subject: memcg: fix a race when setting memory.swappiness (suppose: memcg->use_hierarchy == 0 and memcg->swappiness == 60) echo 10 > /memcg/0/swappiness | mem_cgroup_swappiness_write() | ... | echo 1 > /memcg/0/use_hierarchy | mkdir /mnt/0/1 | sub_memcg->swappiness = 60; memcg->swappiness = 10; | In the above scenario, we end up having 2 different swappiness values in a single hierarchy. We should hold cgroup_lock() when cheking cgrp->children list. Signed-off-by: Li Zefan Acked-by: KAMEZAWA Hiroyuki Cc: Balbir Singh Cc: Paul Menage Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f0dc076adf0..4d0ea3ceba6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1992,6 +1992,7 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, { struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp); struct mem_cgroup *parent; + if (val > 100) return -EINVAL; @@ -1999,15 +2000,22 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, return -EINVAL; parent = mem_cgroup_from_cont(cgrp->parent); + + cgroup_lock(); + /* If under hierarchy, only empty-root can set this value */ if ((parent->use_hierarchy) || - (memcg->use_hierarchy && !list_empty(&cgrp->children))) + (memcg->use_hierarchy && !list_empty(&cgrp->children))) { + cgroup_unlock(); return -EINVAL; + } spin_lock(&memcg->reclaim_param_lock); memcg->swappiness = val; spin_unlock(&memcg->reclaim_param_lock); + cgroup_unlock(); + return 0; } -- cgit From 00bfddaf7f68a6551319b536f052040c370756b0 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Thu, 15 Jan 2009 13:51:26 -0800 Subject: include of is preferred over Impact: fix 15 make headers_check warnings: include of is preferred over Signed-off-by: Jaswinder Singh Rajput Cc: Ingo Molnar Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/agpgart.h | 1 - include/linux/atm_idt77105.h | 2 +- include/linux/capi.h | 2 +- include/linux/connector.h | 2 +- include/linux/cyclades.h | 2 -- include/linux/fb.h | 2 +- include/linux/if_pppol2tp.h | 2 +- include/linux/if_pppox.h | 2 +- include/linux/input.h | 2 +- include/linux/joystick.h | 2 +- include/linux/kvm.h | 2 +- include/linux/loop.h | 2 +- include/linux/matroxfb.h | 2 +- include/linux/phantom.h | 2 +- include/linux/radeonfb.h | 2 +- 15 files changed, 13 insertions(+), 16 deletions(-) diff --git a/include/linux/agpgart.h b/include/linux/agpgart.h index c8fdb6e658e..110c600c885 100644 --- a/include/linux/agpgart.h +++ b/include/linux/agpgart.h @@ -52,7 +52,6 @@ #ifndef __KERNEL__ #include -#include struct agp_version { __u16 major; diff --git a/include/linux/atm_idt77105.h b/include/linux/atm_idt77105.h index 05621cf2070..8b724000aa5 100644 --- a/include/linux/atm_idt77105.h +++ b/include/linux/atm_idt77105.h @@ -7,7 +7,7 @@ #ifndef LINUX_ATM_IDT77105_H #define LINUX_ATM_IDT77105_H -#include +#include #include #include diff --git a/include/linux/capi.h b/include/linux/capi.h index fdebaaa9f66..65100d6cb89 100644 --- a/include/linux/capi.h +++ b/include/linux/capi.h @@ -12,7 +12,7 @@ #ifndef __LINUX_CAPI_H__ #define __LINUX_CAPI_H__ -#include +#include #include #ifndef __KERNEL__ #include diff --git a/include/linux/connector.h b/include/linux/connector.h index 5c7f9468f75..34f2789d9b9 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -22,7 +22,7 @@ #ifndef __CONNECTOR_H #define __CONNECTOR_H -#include +#include #define CN_IDX_CONNECTOR 0xffffffff #define CN_VAL_CONNECTOR 0xffffffff diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h index 2d3d1e04ba9..d06fbf28634 100644 --- a/include/linux/cyclades.h +++ b/include/linux/cyclades.h @@ -150,8 +150,6 @@ struct CYZ_BOOT_CTRL { * architectures and compilers. */ -#include - typedef __u64 ucdouble; /* 64 bits, unsigned */ typedef __u32 uclong; /* 32 bits, unsigned */ typedef __u16 ucshort; /* 16 bits, unsigned */ diff --git a/include/linux/fb.h b/include/linux/fb.h index 1ee63df5be9..818fe21257e 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -1,7 +1,7 @@ #ifndef _LINUX_FB_H #define _LINUX_FB_H -#include +#include #include struct dentry; diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h index a7d6a2234b3..c7a66882b6d 100644 --- a/include/linux/if_pppol2tp.h +++ b/include/linux/if_pppol2tp.h @@ -15,7 +15,7 @@ #ifndef __LINUX_IF_PPPOL2TP_H #define __LINUX_IF_PPPOL2TP_H -#include +#include #ifdef __KERNEL__ #include diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 6fb7f178857..30c88b2245f 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -17,7 +17,7 @@ #define __LINUX_IF_PPPOX_H -#include +#include #include #ifdef __KERNEL__ diff --git a/include/linux/input.h b/include/linux/input.h index 9a6355f74db..1249a0c20a3 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #endif /* diff --git a/include/linux/joystick.h b/include/linux/joystick.h index b5e051295a6..9e20c29c1e1 100644 --- a/include/linux/joystick.h +++ b/include/linux/joystick.h @@ -27,7 +27,7 @@ * Vojtech Pavlik, Ucitelska 1576, Prague 8, 182 00 Czech Republic */ -#include +#include #include /* diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 35525ac6333..5715f190760 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -7,7 +7,7 @@ * Note: you must update KVM_API_VERSION if you change this interface. */ -#include +#include #include #include #include diff --git a/include/linux/loop.h b/include/linux/loop.h index 46169a7b559..6ffd6db5bb0 100644 --- a/include/linux/loop.h +++ b/include/linux/loop.h @@ -80,7 +80,7 @@ enum { }; #include /* for __kernel_old_dev_t */ -#include /* for __u64 */ +#include /* for __u64 */ /* Backwards compatibility version */ struct loop_info { diff --git a/include/linux/matroxfb.h b/include/linux/matroxfb.h index ae5b0949306..404f678e734 100644 --- a/include/linux/matroxfb.h +++ b/include/linux/matroxfb.h @@ -2,7 +2,7 @@ #define __LINUX_MATROXFB_H__ #include -#include +#include #include struct matroxioc_output_mode { diff --git a/include/linux/phantom.h b/include/linux/phantom.h index 02268c54c25..94dd6645c60 100644 --- a/include/linux/phantom.h +++ b/include/linux/phantom.h @@ -10,7 +10,7 @@ #ifndef __PHANTOM_H #define __PHANTOM_H -#include +#include /* PHN_(G/S)ET_REG param */ struct phm_reg { diff --git a/include/linux/radeonfb.h b/include/linux/radeonfb.h index 5bd8975ed78..8c4bbdecc44 100644 --- a/include/linux/radeonfb.h +++ b/include/linux/radeonfb.h @@ -2,7 +2,7 @@ #define __LINUX_RADEONFB_H__ #include -#include +#include #define ATY_RADEON_LCD_ON 0x00000001 #define ATY_RADEON_CRT_ON 0x00000002 -- cgit From 634a84f8d5b74da497688d3346f6809c28239eda Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 15 Jan 2009 13:51:28 -0800 Subject: drivers/usb/core/hub.c: fix CONFIG_USB_OTG=y build Carry out the PM-routine interface change in the USB OTG pathway. This was omitted from the earlier interface-change patch by mistake. Signed-off-by: Alan Stern Cc: Greg KH Cc: Russell King Cc: Felipe Balbi Cc: Tony Lindgren Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/usb/core/hub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index d5d0e40b1e2..94d5ee263c2 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1554,7 +1554,7 @@ static int usb_configure_device_otg(struct usb_device *udev) * (Includes HNP test device.) */ if (udev->bus->b_hnp_enable || udev->bus->is_b_host) { - err = usb_port_suspend(udev); + err = usb_port_suspend(udev, PMSG_SUSPEND); if (err < 0) dev_dbg(&udev->dev, "HNP fail, %d\n", err); } -- cgit From 6b7021ef7e1a703c7092daeceda063951b22b4f6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 15 Jan 2009 13:51:29 -0800 Subject: ext2: also update the inode on disk when dir is IS_DIRSYNC We used to just write changed page for IS_DIRSYNC inodes. But we also have to update the directory inode itself just for the case that we've allocated a new block and changed i_size. [akpm@linux-foundation.org: still sync the data page] Signed-off-by: Jan Kara Tested-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext2/dir.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 9a0fc400f91..2999d72153b 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -95,10 +95,13 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) mark_inode_dirty(dir); } - if (IS_DIRSYNC(dir)) + if (IS_DIRSYNC(dir)) { err = write_one_page(page, 1); - else + if (!err) + err = ext2_sync_inode(dir); + } else { unlock_page(page); + } return err; } -- cgit From 6364853dabe78dda7ffdfb8803c1e56c0fff2e43 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 15 Jan 2009 13:51:29 -0800 Subject: 9p: disallow RDMA if RDMA CM isn't available If INET=y and INFINIBAND=y, but IPV6=m then INFINIBAND_ADDR_TRANS is set to n and the RDMA CM functions rdma_connect() et al are not built. However, the current config dependencies allow NET_9P_RDMA to be selected in this, which leads to a build failure. Fix this by adding a dependency on INFINIBAND_ADDR_TRANS to disallow NET_9P_RDMA in this case. Reported-by: Randy Dunlap Signed-off-by: Roland Dreier Acked-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/9p/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/9p/Kconfig b/net/9p/Kconfig index 0663f99e977..7ed75c7bd5d 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -23,7 +23,7 @@ config NET_9P_VIRTIO guest partitions and a host partition. config NET_9P_RDMA - depends on INET && INFINIBAND && EXPERIMENTAL + depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL tristate "9P RDMA Transport (Experimental)" help This builds support for an RDMA transport. -- cgit From 775a42ecf8b8a86b55173da27e6cc874af5b944d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 6 Jan 2009 14:59:00 +0000 Subject: powerpc: Cleanup from l64 to ll64 change drivers/scsi This is a powerpc specific driver. Signed-off-by: Stephen Rothwell Signed-off-by: Benjamin Herrenschmidt --- drivers/scsi/ibmvscsi/ibmvfc.c | 14 +++++++------- drivers/scsi/ibmvscsi/ibmvfc.h | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index ee0739b217b..91ef669d98f 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -933,7 +933,7 @@ static void ibmvfc_get_host_speed(struct Scsi_Host *shost) fc_host_speed(shost) = FC_PORTSPEED_16GBIT; break; default: - ibmvfc_log(vhost, 3, "Unknown port speed: %ld Gbit\n", + ibmvfc_log(vhost, 3, "Unknown port speed: %lld Gbit\n", vhost->login_buf->resp.link_speed / 100); fc_host_speed(shost) = FC_PORTSPEED_UNKNOWN; break; @@ -2149,8 +2149,8 @@ static void ibmvfc_handle_async(struct ibmvfc_async_crq *crq, { const char *desc = ibmvfc_get_ae_desc(crq->event); - ibmvfc_log(vhost, 3, "%s event received. scsi_id: %lx, wwpn: %lx," - " node_name: %lx\n", desc, crq->scsi_id, crq->wwpn, crq->node_name); + ibmvfc_log(vhost, 3, "%s event received. scsi_id: %llx, wwpn: %llx," + " node_name: %llx\n", desc, crq->scsi_id, crq->wwpn, crq->node_name); switch (crq->event) { case IBMVFC_AE_LINK_UP: @@ -2184,7 +2184,7 @@ static void ibmvfc_handle_async(struct ibmvfc_async_crq *crq, ibmvfc_link_down(vhost, IBMVFC_HALTED); break; default: - dev_err(vhost->dev, "Unknown async event received: %ld\n", crq->event); + dev_err(vhost->dev, "Unknown async event received: %lld\n", crq->event); break; }; } @@ -2261,13 +2261,13 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost) * actually sent */ if (unlikely(!ibmvfc_valid_event(&vhost->pool, evt))) { - dev_err(vhost->dev, "Returned correlation_token 0x%08lx is invalid!\n", + dev_err(vhost->dev, "Returned correlation_token 0x%08llx is invalid!\n", crq->ioba); return; } if (unlikely(atomic_read(&evt->free))) { - dev_err(vhost->dev, "Received duplicate correlation_token 0x%08lx!\n", + dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n", crq->ioba); return; } @@ -3259,7 +3259,7 @@ static int ibmvfc_alloc_target(struct ibmvfc_host *vhost, u64 scsi_id) tgt = mempool_alloc(vhost->tgt_pool, GFP_KERNEL); if (!tgt) { - dev_err(vhost->dev, "Target allocation failure for scsi id %08lx\n", + dev_err(vhost->dev, "Target allocation failure for scsi id %08llx\n", scsi_id); return -ENOMEM; } diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index babdf3db59d..87dafd0f8d4 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -691,13 +691,13 @@ struct ibmvfc_host { #define DBG_CMD(CMD) do { if (ibmvfc_debug) CMD; } while (0) #define tgt_dbg(t, fmt, ...) \ - DBG_CMD(dev_info((t)->vhost->dev, "%lX: " fmt, (t)->scsi_id, ##__VA_ARGS__)) + DBG_CMD(dev_info((t)->vhost->dev, "%llX: " fmt, (t)->scsi_id, ##__VA_ARGS__)) #define tgt_info(t, fmt, ...) \ - dev_info((t)->vhost->dev, "%lX: " fmt, (t)->scsi_id, ##__VA_ARGS__) + dev_info((t)->vhost->dev, "%llX: " fmt, (t)->scsi_id, ##__VA_ARGS__) #define tgt_err(t, fmt, ...) \ - dev_err((t)->vhost->dev, "%lX: " fmt, (t)->scsi_id, ##__VA_ARGS__) + dev_err((t)->vhost->dev, "%llX: " fmt, (t)->scsi_id, ##__VA_ARGS__) #define ibmvfc_dbg(vhost, ...) \ DBG_CMD(dev_info((vhost)->dev, ##__VA_ARGS__)) -- cgit From c52fe6b620e9c7a52b296ec478bd24b91b4e7634 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 13 Jan 2009 19:54:50 +0000 Subject: powerpc/ps3: set_dabr() takes an unsigned long Also silences this warning: arch/powerpc/platforms/ps3/setup.c:275: warning: initialization from incompatible pointer type Signed-off-by: Stephen Rothwell Acked-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/ps3/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/ps3/setup.c b/arch/powerpc/platforms/ps3/setup.c index 35f3e85cf60..3331ccbb8d3 100644 --- a/arch/powerpc/platforms/ps3/setup.c +++ b/arch/powerpc/platforms/ps3/setup.c @@ -186,7 +186,7 @@ early_param("ps3flash", early_parse_ps3flash); #define prealloc_ps3flash_bounce_buffer() do { } while (0) #endif -static int ps3_set_dabr(u64 dabr) +static int ps3_set_dabr(unsigned long dabr) { enum {DABR_USER = 1, DABR_KERNEL = 2,}; -- cgit From 494fd07a88ea561e1bea73516d7e92c4c2d1f223 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 13 Jan 2009 19:58:10 +0000 Subject: powerpc/ps3: Use dma_addr_t down through the stack Push the dma_addr_t type usage all the way down to where the actual values are manipulated. Now that u64 is "unsigned long long", this removes warnings like: arch/powerpc/platforms/ps3/system-bus.c:532: warning: passing argument 4 of 'ps3_dma_map' from incompatible pointer type arch/powerpc/platforms/ps3/system-bus.c:649: warning: passing argument 4 of 'ps3_dma_map' from incompatible pointer type Signed-off-by: Stephen Rothwell Acked-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/ps3.h | 8 ++++---- arch/powerpc/platforms/ps3/mm.c | 32 +++++++++++++++++--------------- arch/powerpc/platforms/ps3/system-bus.c | 4 ++-- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index eead5c67197..67f1812698d 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -103,10 +103,10 @@ struct ps3_dma_region_ops { int (*map)(struct ps3_dma_region *, unsigned long virt_addr, unsigned long len, - unsigned long *bus_addr, + dma_addr_t *bus_addr, u64 iopte_pp); int (*unmap)(struct ps3_dma_region *, - unsigned long bus_addr, + dma_addr_t bus_addr, unsigned long len); }; /** @@ -124,9 +124,9 @@ int ps3_dma_region_init(struct ps3_system_bus_device *dev, int ps3_dma_region_create(struct ps3_dma_region *r); int ps3_dma_region_free(struct ps3_dma_region *r); int ps3_dma_map(struct ps3_dma_region *r, unsigned long virt_addr, - unsigned long len, unsigned long *bus_addr, + unsigned long len, dma_addr_t *bus_addr, u64 iopte_pp); -int ps3_dma_unmap(struct ps3_dma_region *r, unsigned long bus_addr, +int ps3_dma_unmap(struct ps3_dma_region *r, dma_addr_t bus_addr, unsigned long len); /* mmio routines */ diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index a4d49dd9e8a..4761e2dcf3e 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -777,7 +777,7 @@ static int dma_ioc0_region_free(struct ps3_dma_region *r) */ static int dma_sb_map_area(struct ps3_dma_region *r, unsigned long virt_addr, - unsigned long len, unsigned long *bus_addr, + unsigned long len, dma_addr_t *bus_addr, u64 iopte_flag) { int result; @@ -800,7 +800,7 @@ static int dma_sb_map_area(struct ps3_dma_region *r, unsigned long virt_addr, DBG("%s:%d lpar_addr %lxh\n", __func__, __LINE__, lpar_addr); DBG("%s:%d len %lxh\n", __func__, __LINE__, len); - DBG("%s:%d bus_addr %lxh (%lxh)\n", __func__, __LINE__, + DBG("%s:%d bus_addr %llxh (%lxh)\n", __func__, __LINE__, *bus_addr, len); } @@ -832,7 +832,7 @@ static int dma_sb_map_area(struct ps3_dma_region *r, unsigned long virt_addr, } static int dma_ioc0_map_area(struct ps3_dma_region *r, unsigned long virt_addr, - unsigned long len, unsigned long *bus_addr, + unsigned long len, dma_addr_t *bus_addr, u64 iopte_flag) { int result; @@ -872,7 +872,7 @@ static int dma_ioc0_map_area(struct ps3_dma_region *r, unsigned long virt_addr, return result; } *bus_addr = c->bus_addr + phys_addr - aligned_phys; - DBG("%s: va=%#lx pa=%#lx a_pa=%#lx bus=%#lx\n", __func__, + DBG("%s: va=%#lx pa=%#lx a_pa=%#lx bus=%#llx\n", __func__, virt_addr, phys_addr, aligned_phys, *bus_addr); c->usage_count = 1; @@ -889,7 +889,7 @@ static int dma_ioc0_map_area(struct ps3_dma_region *r, unsigned long virt_addr, * This is the common dma unmap routine. */ -static int dma_sb_unmap_area(struct ps3_dma_region *r, unsigned long bus_addr, +static int dma_sb_unmap_area(struct ps3_dma_region *r, dma_addr_t bus_addr, unsigned long len) { unsigned long flags; @@ -903,7 +903,7 @@ static int dma_sb_unmap_area(struct ps3_dma_region *r, unsigned long bus_addr, 1 << r->page_size); unsigned long aligned_len = _ALIGN_UP(len + bus_addr - aligned_bus, 1 << r->page_size); - DBG("%s:%d: not found: bus_addr %lxh\n", + DBG("%s:%d: not found: bus_addr %llxh\n", __func__, __LINE__, bus_addr); DBG("%s:%d: not found: len %lxh\n", __func__, __LINE__, len); @@ -926,12 +926,12 @@ static int dma_sb_unmap_area(struct ps3_dma_region *r, unsigned long bus_addr, } static int dma_ioc0_unmap_area(struct ps3_dma_region *r, - unsigned long bus_addr, unsigned long len) + dma_addr_t bus_addr, unsigned long len) { unsigned long flags; struct dma_chunk *c; - DBG("%s: start a=%#lx l=%#lx\n", __func__, bus_addr, len); + DBG("%s: start a=%#llx l=%#lx\n", __func__, bus_addr, len); spin_lock_irqsave(&r->chunk_list.lock, flags); c = dma_find_chunk(r, bus_addr, len); @@ -941,7 +941,7 @@ static int dma_ioc0_unmap_area(struct ps3_dma_region *r, unsigned long aligned_len = _ALIGN_UP(len + bus_addr - aligned_bus, 1 << r->page_size); - DBG("%s:%d: not found: bus_addr %lxh\n", + DBG("%s:%d: not found: bus_addr %llxh\n", __func__, __LINE__, bus_addr); DBG("%s:%d: not found: len %lxh\n", __func__, __LINE__, len); @@ -975,7 +975,8 @@ static int dma_ioc0_unmap_area(struct ps3_dma_region *r, static int dma_sb_region_create_linear(struct ps3_dma_region *r) { int result; - unsigned long virt_addr, len, tmp; + unsigned long virt_addr, len; + dma_addr_t tmp; if (r->len > 16*1024*1024) { /* FIXME: need proper fix */ /* force 16M dma pages for linear mapping */ @@ -1027,7 +1028,8 @@ static int dma_sb_region_create_linear(struct ps3_dma_region *r) static int dma_sb_region_free_linear(struct ps3_dma_region *r) { int result; - unsigned long bus_addr, len, lpar_addr; + dma_addr_t bus_addr; + unsigned long len, lpar_addr; if (r->offset < map.rm.size) { /* Unmap (part of) 1st RAM chunk */ @@ -1072,7 +1074,7 @@ static int dma_sb_region_free_linear(struct ps3_dma_region *r) */ static int dma_sb_map_area_linear(struct ps3_dma_region *r, - unsigned long virt_addr, unsigned long len, unsigned long *bus_addr, + unsigned long virt_addr, unsigned long len, dma_addr_t *bus_addr, u64 iopte_flag) { unsigned long phys_addr = is_kernel_addr(virt_addr) ? __pa(virt_addr) @@ -1091,7 +1093,7 @@ static int dma_sb_map_area_linear(struct ps3_dma_region *r, */ static int dma_sb_unmap_area_linear(struct ps3_dma_region *r, - unsigned long bus_addr, unsigned long len) + dma_addr_t bus_addr, unsigned long len) { return 0; }; @@ -1169,13 +1171,13 @@ int ps3_dma_region_free(struct ps3_dma_region *r) EXPORT_SYMBOL(ps3_dma_region_free); int ps3_dma_map(struct ps3_dma_region *r, unsigned long virt_addr, - unsigned long len, unsigned long *bus_addr, + unsigned long len, dma_addr_t *bus_addr, u64 iopte_flag) { return r->region_ops->map(r, virt_addr, len, bus_addr, iopte_flag); } -int ps3_dma_unmap(struct ps3_dma_region *r, unsigned long bus_addr, +int ps3_dma_unmap(struct ps3_dma_region *r, dma_addr_t bus_addr, unsigned long len) { return r->region_ops->unmap(r, bus_addr, len); diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index ee0d2291162..70ef793e556 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -568,7 +568,7 @@ static dma_addr_t ps3_sb_map_page(struct device *_dev, struct page *page, { struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); int result; - unsigned long bus_addr; + dma_addr_t bus_addr; void *ptr = page_address(page) + offset; result = ps3_dma_map(dev->d_region, (unsigned long)ptr, size, @@ -590,7 +590,7 @@ static dma_addr_t ps3_ioc0_map_page(struct device *_dev, struct page *page, { struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); int result; - unsigned long bus_addr; + dma_addr_t bus_addr; u64 iopte_flag; void *ptr = page_address(page) + offset; -- cgit From b17b3df161814c43c03dbc8dbf8d32741bb30ba4 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 13 Jan 2009 19:59:41 +0000 Subject: powerpc/ps3: The lv1_ routines have u64 parameters We just fix up the reference parameters as the others are dealt with by arithmetic promotion rules and don't cause warnings. This removes warnings like this: arch/powerpc/platforms/ps3/interrupt.c:327: warning: passing argument 1 of 'lv1_construct_event_receive_port' from incompatible pointer type Also, these: drivers/ps3/ps3-vuart.c:462: warning: passing argument 4 of 'ps3_vuart_raw_read' from incompatible pointer type drivers/ps3/ps3-vuart.c:592: warning: passing argument 4 of 'ps3_vuart_raw_read' from incompatible pointer type Signed-off-by: Stephen Rothwell Acked-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/ps3/interrupt.c | 8 +++---- arch/powerpc/platforms/ps3/mm.c | 38 ++++++++++++++++++--------------- arch/powerpc/platforms/ps3/spu.c | 12 +++++++---- arch/powerpc/platforms/ps3/system-bus.c | 4 +++- drivers/ps3/ps3-vuart.c | 24 +++++++++++---------- 5 files changed, 49 insertions(+), 37 deletions(-) diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index e59634f7af9..b746792d84c 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -322,7 +322,7 @@ EXPORT_SYMBOL_GPL(ps3_irq_plug_destroy); int ps3_event_receive_port_setup(enum ps3_cpu_binding cpu, unsigned int *virq) { int result; - unsigned long outlet; + u64 outlet; result = lv1_construct_event_receive_port(&outlet); @@ -468,7 +468,7 @@ int ps3_io_irq_setup(enum ps3_cpu_binding cpu, unsigned int interrupt_id, unsigned int *virq) { int result; - unsigned long outlet; + u64 outlet; result = lv1_construct_io_irq_outlet(interrupt_id, &outlet); @@ -525,7 +525,7 @@ int ps3_vuart_irq_setup(enum ps3_cpu_binding cpu, void* virt_addr_bmp, unsigned int *virq) { int result; - unsigned long outlet; + u64 outlet; u64 lpar_addr; BUG_ON(!is_kernel_addr((u64)virt_addr_bmp)); @@ -581,7 +581,7 @@ int ps3_spe_irq_setup(enum ps3_cpu_binding cpu, unsigned long spe_id, unsigned int class, unsigned int *virq) { int result; - unsigned long outlet; + u64 outlet; BUG_ON(class > 2); diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 4761e2dcf3e..5c8d066283e 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -79,7 +79,7 @@ enum { */ struct mem_region { - unsigned long base; + u64 base; unsigned long size; unsigned long offset; }; @@ -104,8 +104,8 @@ struct mem_region { struct map { unsigned long total; - unsigned long vas_id; - unsigned long htab_size; + u64 vas_id; + u64 htab_size; struct mem_region rm; struct mem_region r1; }; @@ -116,9 +116,9 @@ static void __maybe_unused _debug_dump_map(const struct map *m, { DBG("%s:%d: map.total = %lxh\n", func, line, m->total); DBG("%s:%d: map.rm.size = %lxh\n", func, line, m->rm.size); - DBG("%s:%d: map.vas_id = %lu\n", func, line, m->vas_id); - DBG("%s:%d: map.htab_size = %lxh\n", func, line, m->htab_size); - DBG("%s:%d: map.r1.base = %lxh\n", func, line, m->r1.base); + DBG("%s:%d: map.vas_id = %llu\n", func, line, m->vas_id); + DBG("%s:%d: map.htab_size = %llxh\n", func, line, m->htab_size); + DBG("%s:%d: map.r1.base = %llxh\n", func, line, m->r1.base); DBG("%s:%d: map.r1.offset = %lxh\n", func, line, m->r1.offset); DBG("%s:%d: map.r1.size = %lxh\n", func, line, m->r1.size); } @@ -146,11 +146,11 @@ EXPORT_SYMBOL(ps3_mm_phys_to_lpar); void __init ps3_mm_vas_create(unsigned long* htab_size) { int result; - unsigned long start_address; - unsigned long size; - unsigned long access_right; - unsigned long max_page_size; - unsigned long flags; + u64 start_address; + u64 size; + u64 access_right; + u64 max_page_size; + u64 flags; result = lv1_query_logical_partition_address_region_info(0, &start_address, &size, &access_right, &max_page_size, @@ -164,7 +164,7 @@ void __init ps3_mm_vas_create(unsigned long* htab_size) } if (max_page_size < PAGE_SHIFT_16M) { - DBG("%s:%d: bad max_page_size %lxh\n", __func__, __LINE__, + DBG("%s:%d: bad max_page_size %llxh\n", __func__, __LINE__, max_page_size); goto fail; } @@ -208,7 +208,7 @@ void ps3_mm_vas_destroy(void) { int result; - DBG("%s:%d: map.vas_id = %lu\n", __func__, __LINE__, map.vas_id); + DBG("%s:%d: map.vas_id = %llu\n", __func__, __LINE__, map.vas_id); if (map.vas_id) { result = lv1_select_virtual_address_space(0); @@ -235,7 +235,7 @@ void ps3_mm_vas_destroy(void) static int ps3_mm_region_create(struct mem_region *r, unsigned long size) { int result; - unsigned long muid; + u64 muid; r->size = _ALIGN_DOWN(size, 1 << PAGE_SHIFT_16M); @@ -277,7 +277,7 @@ static void ps3_mm_region_destroy(struct mem_region *r) { int result; - DBG("%s:%d: r->base = %lxh\n", __func__, __LINE__, r->base); + DBG("%s:%d: r->base = %llxh\n", __func__, __LINE__, r->base); if (r->base) { result = lv1_release_memory(r->base); BUG_ON(result); @@ -648,6 +648,7 @@ fail_alloc: static int dma_sb_region_create(struct ps3_dma_region *r) { int result; + u64 bus_addr; DBG(" -> %s:%d:\n", __func__, __LINE__); @@ -671,7 +672,8 @@ static int dma_sb_region_create(struct ps3_dma_region *r) result = lv1_allocate_device_dma_region(r->dev->bus_id, r->dev->dev_id, roundup_pow_of_two(r->len), r->page_size, r->region_type, - &r->bus_addr); + &bus_addr); + r->bus_addr = bus_addr; if (result) { DBG("%s:%d: lv1_allocate_device_dma_region failed: %s\n", @@ -685,6 +687,7 @@ static int dma_sb_region_create(struct ps3_dma_region *r) static int dma_ioc0_region_create(struct ps3_dma_region *r) { int result; + u64 bus_addr; INIT_LIST_HEAD(&r->chunk_list.head); spin_lock_init(&r->chunk_list.lock); @@ -692,7 +695,8 @@ static int dma_ioc0_region_create(struct ps3_dma_region *r) result = lv1_allocate_io_segment(0, r->len, r->page_size, - &r->bus_addr); + &bus_addr); + r->bus_addr = bus_addr; if (result) { DBG("%s:%d: lv1_allocate_io_segment failed: %s\n", __func__, __LINE__, ps3_result(result)); diff --git a/arch/powerpc/platforms/ps3/spu.c b/arch/powerpc/platforms/ps3/spu.c index ccae3d446b9..b3c6a993f9f 100644 --- a/arch/powerpc/platforms/ps3/spu.c +++ b/arch/powerpc/platforms/ps3/spu.c @@ -149,7 +149,7 @@ EXPORT_SYMBOL_GPL(ps3_get_spe_id); static unsigned long get_vas_id(void) { - unsigned long id; + u64 id; lv1_get_logical_ppe_id(&id); lv1_get_virtual_address_space_id_of_ppe(id, &id); @@ -160,14 +160,18 @@ static unsigned long get_vas_id(void) static int __init construct_spu(struct spu *spu) { int result; - unsigned long unused; + u64 unused; + u64 problem_phys; + u64 local_store_phys; result = lv1_construct_logical_spe(PAGE_SHIFT, PAGE_SHIFT, PAGE_SHIFT, PAGE_SHIFT, PAGE_SHIFT, get_vas_id(), SPE_TYPE_LOGICAL, - &spu_pdata(spu)->priv2_addr, &spu->problem_phys, - &spu->local_store_phys, &unused, + &spu_pdata(spu)->priv2_addr, &problem_phys, + &local_store_phys, &unused, &spu_pdata(spu)->shadow_addr, &spu_pdata(spu)->spe_id); + spu->problem_phys = problem_phys; + spu->local_store_phys = local_store_phys; if (result) { pr_debug("%s:%d: lv1_construct_logical_spe failed: %s\n", diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 70ef793e556..9bd4d677d3f 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -250,9 +250,11 @@ static void _dump_mmio_region(const struct ps3_mmio_region* r, static int ps3_sb_mmio_region_create(struct ps3_mmio_region *r) { int result; + u64 lpar_addr; result = lv1_map_device_mmio_region(r->dev->bus_id, r->dev->dev_id, - r->bus_addr, r->len, r->page_size, &r->lpar_addr); + r->bus_addr, r->len, r->page_size, &lpar_addr); + r->lpar_addr = lpar_addr; if (result) { pr_debug("%s:%d: lv1_map_device_mmio_region failed: %s\n", diff --git a/drivers/ps3/ps3-vuart.c b/drivers/ps3/ps3-vuart.c index 90c097a7a47..264c4475014 100644 --- a/drivers/ps3/ps3-vuart.c +++ b/drivers/ps3/ps3-vuart.c @@ -159,11 +159,13 @@ int ps3_vuart_get_triggers(struct ps3_system_bus_device *dev, struct vuart_triggers *trig) { int result; - unsigned long size; - unsigned long val; + u64 size; + u64 val; + u64 tx; result = lv1_get_virtual_uart_param(dev->port_number, - PARAM_TX_TRIGGER, &trig->tx); + PARAM_TX_TRIGGER, &tx); + trig->tx = tx; if (result) { dev_dbg(&dev->core, "%s:%d: tx_trigger failed: %s\n", @@ -201,7 +203,7 @@ int ps3_vuart_set_triggers(struct ps3_system_bus_device *dev, unsigned int tx, unsigned int rx) { int result; - unsigned long size; + u64 size; result = lv1_set_virtual_uart_param(dev->port_number, PARAM_TX_TRIGGER, tx); @@ -363,7 +365,7 @@ int ps3_vuart_disable_interrupt_disconnect(struct ps3_system_bus_device *dev) */ static int ps3_vuart_raw_write(struct ps3_system_bus_device *dev, - const void *buf, unsigned int bytes, unsigned long *bytes_written) + const void *buf, unsigned int bytes, u64 *bytes_written) { int result; struct ps3_vuart_port_priv *priv = to_port_priv(dev); @@ -379,7 +381,7 @@ static int ps3_vuart_raw_write(struct ps3_system_bus_device *dev, priv->stats.bytes_written += *bytes_written; - dev_dbg(&dev->core, "%s:%d: wrote %lxh/%xh=>%lxh\n", __func__, __LINE__, + dev_dbg(&dev->core, "%s:%d: wrote %llxh/%xh=>%lxh\n", __func__, __LINE__, *bytes_written, bytes, priv->stats.bytes_written); return result; @@ -393,7 +395,7 @@ static int ps3_vuart_raw_write(struct ps3_system_bus_device *dev, */ static int ps3_vuart_raw_read(struct ps3_system_bus_device *dev, void *buf, - unsigned int bytes, unsigned long *bytes_read) + unsigned int bytes, u64 *bytes_read) { int result; struct ps3_vuart_port_priv *priv = to_port_priv(dev); @@ -411,7 +413,7 @@ static int ps3_vuart_raw_read(struct ps3_system_bus_device *dev, void *buf, priv->stats.bytes_read += *bytes_read; - dev_dbg(&dev->core, "%s:%d: read %lxh/%xh=>%lxh\n", __func__, __LINE__, + dev_dbg(&dev->core, "%s:%d: read %llxh/%xh=>%lxh\n", __func__, __LINE__, *bytes_read, bytes, priv->stats.bytes_read); return result; @@ -500,7 +502,7 @@ int ps3_vuart_write(struct ps3_system_bus_device *dev, const void *buf, spin_lock_irqsave(&priv->tx_list.lock, flags); if (list_empty(&priv->tx_list.head)) { - unsigned long bytes_written; + u64 bytes_written; result = ps3_vuart_raw_write(dev, buf, bytes, &bytes_written); @@ -745,7 +747,7 @@ static int ps3_vuart_handle_interrupt_tx(struct ps3_system_bus_device *dev) list_for_each_entry_safe(lb, n, &priv->tx_list.head, link) { - unsigned long bytes_written; + u64 bytes_written; result = ps3_vuart_raw_write(dev, lb->head, lb->tail - lb->head, &bytes_written); @@ -762,7 +764,7 @@ static int ps3_vuart_handle_interrupt_tx(struct ps3_system_bus_device *dev) if (bytes_written < lb->tail - lb->head) { lb->head += bytes_written; dev_dbg(&dev->core, - "%s:%d cleared buf_%lu, %lxh bytes\n", + "%s:%d cleared buf_%lu, %llxh bytes\n", __func__, __LINE__, lb->dbg_number, bytes_written); goto port_full; -- cgit From 46ca0d15394baf804fbb38694c651dd067c69458 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 13 Jan 2009 20:00:29 +0000 Subject: powerpc/ps3: clear_bit()/set_bit() operate on unsigned longs This fixes these compiler warning: arch/powerpc/platforms/ps3/interrupt.c:109: warning: passing argument 2 of 'clear_bit' from incompatible pointer type arch/powerpc/platforms/ps3/interrupt.c:130: warning: passing argument 2 of 'set_bit' from incompatible pointer type Signed-off-by: Stephen Rothwell Acked-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/ps3/interrupt.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index b746792d84c..3ea6e51a7ff 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -60,6 +60,8 @@ * gives a usable range of plug values of {NUM_ISA_INTERRUPTS..63}. Note * that there is no constraint on how many in this set an individual thread * can acquire. + * + * The mask is declared as unsigned long so we can use set/clear_bit on it. */ #define PS3_BMP_MINALIGN 64 @@ -68,7 +70,7 @@ struct ps3_bmp { struct { u64 status; u64 unused_1[3]; - u64 mask; + unsigned long mask; u64 unused_2[3]; }; u64 ipi_debug_brk_mask; -- cgit From 5418b9c671a99727667cd499a2acbebe66e172cc Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 13 Jan 2009 20:01:28 +0000 Subject: powerpc/ps3: ps3_repository_read_mm_info() takes u64 * arguments Fixes compiler warnings: arch/powerpc/platforms/ps3/mm.c:1205: warning: passing argument 2 of 'ps3_repository_read_mm_info' from incompatible pointer type arch/powerpc/platforms/ps3/mm.c:1205: warning: passing argument 3 of 'ps3_repository_read_mm_info' from incompatible pointer type Signed-off-by: Stephen Rothwell Acked-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/ps3/mm.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 5c8d066283e..715508cc7e0 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -80,7 +80,7 @@ enum { struct mem_region { u64 base; - unsigned long size; + u64 size; unsigned long offset; }; @@ -103,7 +103,7 @@ struct mem_region { */ struct map { - unsigned long total; + u64 total; u64 vas_id; u64 htab_size; struct mem_region rm; @@ -114,13 +114,13 @@ struct map { static void __maybe_unused _debug_dump_map(const struct map *m, const char *func, int line) { - DBG("%s:%d: map.total = %lxh\n", func, line, m->total); - DBG("%s:%d: map.rm.size = %lxh\n", func, line, m->rm.size); + DBG("%s:%d: map.total = %llxh\n", func, line, m->total); + DBG("%s:%d: map.rm.size = %llxh\n", func, line, m->rm.size); DBG("%s:%d: map.vas_id = %llu\n", func, line, m->vas_id); DBG("%s:%d: map.htab_size = %llxh\n", func, line, m->htab_size); DBG("%s:%d: map.r1.base = %llxh\n", func, line, m->r1.base); DBG("%s:%d: map.r1.offset = %lxh\n", func, line, m->r1.offset); - DBG("%s:%d: map.r1.size = %lxh\n", func, line, m->r1.size); + DBG("%s:%d: map.r1.size = %llxh\n", func, line, m->r1.size); } static struct map map; @@ -240,10 +240,9 @@ static int ps3_mm_region_create(struct mem_region *r, unsigned long size) r->size = _ALIGN_DOWN(size, 1 << PAGE_SHIFT_16M); DBG("%s:%d requested %lxh\n", __func__, __LINE__, size); - DBG("%s:%d actual %lxh\n", __func__, __LINE__, r->size); - DBG("%s:%d difference %lxh (%luMB)\n", __func__, __LINE__, - (unsigned long)(size - r->size), - (size - r->size) / 1024 / 1024); + DBG("%s:%d actual %llxh\n", __func__, __LINE__, r->size); + DBG("%s:%d difference %llxh (%lluMB)\n", __func__, __LINE__, + size - r->size, (size - r->size) / 1024 / 1024); if (r->size == 0) { DBG("%s:%d: size == 0\n", __func__, __LINE__); -- cgit From 5c949070c7a591d1001a5d8444731dfa4223b094 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 13 Jan 2009 20:02:39 +0000 Subject: powerpc/ps3: Printing fixups for l64 to ll64 conversion arch/powerpc Signed-off-by: Stephen Rothwell Acked-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/ps3/device-init.c | 26 +++++++++++++------------- arch/powerpc/platforms/ps3/htab.c | 2 +- arch/powerpc/platforms/ps3/interrupt.c | 16 ++++++++-------- arch/powerpc/platforms/ps3/mm.c | 10 +++++----- arch/powerpc/platforms/ps3/os-area.c | 2 +- arch/powerpc/platforms/ps3/repository.c | 22 +++++++++++----------- arch/powerpc/platforms/ps3/system-bus.c | 6 +++--- 7 files changed, 42 insertions(+), 42 deletions(-) diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index ca71a12b764..bb028f165fb 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -82,7 +82,7 @@ static int __init ps3_register_lpm_devices(void) goto fail_rights; } - pr_debug("%s:%d: pu_id %lu, rights %lu(%lxh)\n", + pr_debug("%s:%d: pu_id %llu, rights %llu(%llxh)\n", __func__, __LINE__, dev->lpm.pu_id, dev->lpm.rights, dev->lpm.rights); @@ -348,7 +348,7 @@ static int ps3_setup_storage_dev(const struct ps3_repository_device *repo, return -ENODEV; } - pr_debug("%s:%u: (%u:%u:%u): port %lu blk_size %lu num_blocks %lu " + pr_debug("%s:%u: (%u:%u:%u): port %llu blk_size %llu num_blocks %llu " "num_regions %u\n", __func__, __LINE__, repo->bus_index, repo->dev_index, repo->dev_type, port, blk_size, num_blocks, num_regions); @@ -394,7 +394,7 @@ static int ps3_setup_storage_dev(const struct ps3_repository_device *repo, result = -ENODEV; goto fail_read_region; } - pr_debug("%s:%u: region %u: id %u start %lu size %lu\n", + pr_debug("%s:%u: region %u: id %u start %llu size %llu\n", __func__, __LINE__, i, id, start, size); p->regions[i].id = id; @@ -662,13 +662,13 @@ static void ps3_find_and_add_device(u64 bus_id, u64 dev_id) if (rem) break; } - pr_warning("%s:%u: device %lu:%lu not found\n", __func__, __LINE__, + pr_warning("%s:%u: device %llu:%llu not found\n", __func__, __LINE__, bus_id, dev_id); return; found: if (retries) - pr_debug("%s:%u: device %lu:%lu found after %u retries\n", + pr_debug("%s:%u: device %llu:%llu found after %u retries\n", __func__, __LINE__, bus_id, dev_id, retries); ps3_setup_dynamic_device(&repo); @@ -715,14 +715,14 @@ static irqreturn_t ps3_notification_interrupt(int irq, void *data) res = lv1_storage_get_async_status(PS3_NOTIFICATION_DEV_ID, &tag, &status); if (tag != dev->tag) - pr_err("%s:%u: tag mismatch, got %lx, expected %lx\n", + pr_err("%s:%u: tag mismatch, got %llx, expected %llx\n", __func__, __LINE__, tag, dev->tag); if (res) { - pr_err("%s:%u: res %d status 0x%lx\n", __func__, __LINE__, res, + pr_err("%s:%u: res %d status 0x%llx\n", __func__, __LINE__, res, status); } else { - pr_debug("%s:%u: completed, status 0x%lx\n", __func__, + pr_debug("%s:%u: completed, status 0x%llx\n", __func__, __LINE__, status); dev->lv1_status = status; complete(&dev->done); @@ -761,7 +761,7 @@ static int ps3_notification_read_write(struct ps3_notification_device *dev, } if (dev->lv1_status) { - pr_err("%s:%u: %s not completed, status 0x%lx\n", __func__, + pr_err("%s:%u: %s not completed, status 0x%llx\n", __func__, __LINE__, op, dev->lv1_status); return -EIO; } @@ -850,16 +850,16 @@ static int ps3_probe_thread(void *data) if (res) break; - pr_debug("%s:%u: notify event type 0x%lx bus id %lu dev id %lu" - " type %lu port %lu\n", __func__, __LINE__, + pr_debug("%s:%u: notify event type 0x%llx bus id %llu dev id %llu" + " type %llu port %llu\n", __func__, __LINE__, notify_event->event_type, notify_event->bus_id, notify_event->dev_id, notify_event->dev_type, notify_event->dev_port); if (notify_event->event_type != notify_region_probe || notify_event->bus_id != dev.sbd.bus_id) { - pr_warning("%s:%u: bad notify_event: event %lu, " - "dev_id %lu, dev_type %lu\n", + pr_warning("%s:%u: bad notify_event: event %llu, " + "dev_id %llu, dev_type %llu\n", __func__, __LINE__, notify_event->event_type, notify_event->dev_id, notify_event->dev_type); diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 6eb1d4d182c..1e8a1e39dfe 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -75,7 +75,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long va, if (result) { /* all entries bolted !*/ - pr_info("%s:result=%d va=%lx pa=%lx ix=%lx v=%lx r=%lx\n", + pr_info("%s:result=%d va=%lx pa=%lx ix=%lx v=%llx r=%llx\n", __func__, result, va, pa, hpte_group, hpte_v, hpte_r); BUG(); } diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index 3ea6e51a7ff..8ec5ccf76b1 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -104,7 +104,7 @@ static void ps3_chip_mask(unsigned int virq) struct ps3_private *pd = get_irq_chip_data(virq); unsigned long flags; - pr_debug("%s:%d: thread_id %lu, virq %d\n", __func__, __LINE__, + pr_debug("%s:%d: thread_id %llu, virq %d\n", __func__, __LINE__, pd->thread_id, virq); local_irq_save(flags); @@ -125,7 +125,7 @@ static void ps3_chip_unmask(unsigned int virq) struct ps3_private *pd = get_irq_chip_data(virq); unsigned long flags; - pr_debug("%s:%d: thread_id %lu, virq %d\n", __func__, __LINE__, + pr_debug("%s:%d: thread_id %llu, virq %d\n", __func__, __LINE__, pd->thread_id, virq); local_irq_save(flags); @@ -223,7 +223,7 @@ static int ps3_virq_destroy(unsigned int virq) { const struct ps3_private *pd = get_irq_chip_data(virq); - pr_debug("%s:%d: ppe_id %lu, thread_id %lu, virq %u\n", __func__, + pr_debug("%s:%d: ppe_id %llu, thread_id %llu, virq %u\n", __func__, __LINE__, pd->ppe_id, pd->thread_id, virq); set_irq_chip_data(virq, NULL); @@ -293,7 +293,7 @@ int ps3_irq_plug_destroy(unsigned int virq) int result; const struct ps3_private *pd = get_irq_chip_data(virq); - pr_debug("%s:%d: ppe_id %lu, thread_id %lu, virq %u\n", __func__, + pr_debug("%s:%d: ppe_id %llu, thread_id %llu, virq %u\n", __func__, __LINE__, pd->ppe_id, pd->thread_id, virq); ps3_chip_mask(virq); @@ -693,7 +693,7 @@ void __init ps3_register_ipi_debug_brk(unsigned int cpu, unsigned int virq) pd->bmp.ipi_debug_brk_mask = 0x8000000000000000UL >> virq; - pr_debug("%s:%d: cpu %u, virq %u, mask %lxh\n", __func__, __LINE__, + pr_debug("%s:%d: cpu %u, virq %u, mask %llxh\n", __func__, __LINE__, cpu, virq, pd->bmp.ipi_debug_brk_mask); } @@ -712,7 +712,7 @@ static unsigned int ps3_get_irq(void) plug &= 0x3f; if (unlikely(plug == NO_IRQ)) { - pr_debug("%s:%d: no plug found: thread_id %lu\n", __func__, + pr_debug("%s:%d: no plug found: thread_id %llu\n", __func__, __LINE__, pd->thread_id); dump_bmp(&per_cpu(ps3_private, 0)); dump_bmp(&per_cpu(ps3_private, 1)); @@ -747,7 +747,7 @@ void __init ps3_init_IRQ(void) pd->thread_id = get_hard_smp_processor_id(cpu); spin_lock_init(&pd->bmp.lock); - pr_debug("%s:%d: ppe_id %lu, thread_id %lu, bmp %lxh\n", + pr_debug("%s:%d: ppe_id %llu, thread_id %llu, bmp %lxh\n", __func__, __LINE__, pd->ppe_id, pd->thread_id, ps3_mm_phys_to_lpar(__pa(&pd->bmp))); @@ -772,6 +772,6 @@ void ps3_shutdown_IRQ(int cpu) lv1_get_logical_ppe_id(&ppe_id); result = lv1_configure_irq_state_bitmap(ppe_id, thread_id, 0); - DBG("%s:%d: lv1_configure_irq_state_bitmap (%lu:%lu/%d) %s\n", __func__, + DBG("%s:%d: lv1_configure_irq_state_bitmap (%llu:%llu/%d) %s\n", __func__, __LINE__, ppe_id, thread_id, cpu, ps3_result(result)); } diff --git a/arch/powerpc/platforms/ps3/mm.c b/arch/powerpc/platforms/ps3/mm.c index 715508cc7e0..67de6bf3db3 100644 --- a/arch/powerpc/platforms/ps3/mm.c +++ b/arch/powerpc/platforms/ps3/mm.c @@ -354,7 +354,7 @@ static unsigned long dma_sb_lpar_to_bus(struct ps3_dma_region *r, static void __maybe_unused _dma_dump_region(const struct ps3_dma_region *r, const char *func, int line) { - DBG("%s:%d: dev %lu:%lu\n", func, line, r->dev->bus_id, + DBG("%s:%d: dev %llu:%llu\n", func, line, r->dev->bus_id, r->dev->dev_id); DBG("%s:%d: page_size %u\n", func, line, r->page_size); DBG("%s:%d: bus_addr %lxh\n", func, line, r->bus_addr); @@ -389,7 +389,7 @@ struct dma_chunk { static void _dma_dump_chunk (const struct dma_chunk* c, const char* func, int line) { - DBG("%s:%d: r.dev %lu:%lu\n", func, line, + DBG("%s:%d: r.dev %llu:%llu\n", func, line, c->region->dev->bus_id, c->region->dev->dev_id); DBG("%s:%d: r.bus_addr %lxh\n", func, line, c->region->bus_addr); DBG("%s:%d: r.page_size %u\n", func, line, c->region->page_size); @@ -595,7 +595,7 @@ static int dma_ioc0_map_pages(struct ps3_dma_region *r, unsigned long phys_addr, /* build ioptes for the area */ pages = len >> r->page_size; - DBG("%s: pgsize=%#x len=%#lx pages=%#x iopteflag=%#lx\n", __func__, + DBG("%s: pgsize=%#x len=%#lx pages=%#x iopteflag=%#llx\n", __func__, r->page_size, r->len, pages, iopte_flag); for (iopage = 0; iopage < pages; iopage++) { offset = (1 << r->page_size) * iopage; @@ -654,7 +654,7 @@ static int dma_sb_region_create(struct ps3_dma_region *r) BUG_ON(!r); if (!r->dev->bus_id) { - pr_info("%s:%d: %lu:%lu no dma\n", __func__, __LINE__, + pr_info("%s:%d: %llu:%llu no dma\n", __func__, __LINE__, r->dev->bus_id, r->dev->dev_id); return 0; } @@ -723,7 +723,7 @@ static int dma_sb_region_free(struct ps3_dma_region *r) BUG_ON(!r); if (!r->dev->bus_id) { - pr_info("%s:%d: %lu:%lu no dma\n", __func__, __LINE__, + pr_info("%s:%d: %llu:%llu no dma\n", __func__, __LINE__, r->dev->bus_id, r->dev->dev_id); return 0; } diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index 1d201782d4e..e1c83c23b43 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -306,7 +306,7 @@ static void _dump_params(const struct os_area_params *p, const char *func, { pr_debug("%s:%d: p.boot_flag: %u\n", func, line, p->boot_flag); pr_debug("%s:%d: p.num_params: %u\n", func, line, p->num_params); - pr_debug("%s:%d: p.rtc_diff %ld\n", func, line, p->rtc_diff); + pr_debug("%s:%d: p.rtc_diff %lld\n", func, line, p->rtc_diff); pr_debug("%s:%d: p.av_multi_out %u\n", func, line, p->av_multi_out); pr_debug("%s:%d: p.ctrl_button: %u\n", func, line, p->ctrl_button); pr_debug("%s:%d: p.static_ip_addr: %u.%u.%u.%u\n", func, line, diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c index 22063adeb38..5e304c292f6 100644 --- a/arch/powerpc/platforms/ps3/repository.c +++ b/arch/powerpc/platforms/ps3/repository.c @@ -44,7 +44,7 @@ static void _dump_field(const char *hdr, u64 n, const char *func, int line) s[i] = (in[i] <= 126 && in[i] >= 32) ? in[i] : '.'; s[i] = 0; - pr_debug("%s:%d: %s%016lx : %s\n", func, line, hdr, n, s); + pr_debug("%s:%d: %s%016llx : %s\n", func, line, hdr, n, s); #endif } @@ -70,8 +70,8 @@ static void _dump_node(unsigned int lpar_id, u64 n1, u64 n2, u64 n3, u64 n4, _dump_field("n2: ", n2, func, line); _dump_field("n3: ", n3, func, line); _dump_field("n4: ", n4, func, line); - pr_debug("%s:%d: v1: %016lx\n", func, line, v1); - pr_debug("%s:%d: v2: %016lx\n", func, line, v2); + pr_debug("%s:%d: v1: %016llx\n", func, line, v1); + pr_debug("%s:%d: v2: %016llx\n", func, line, v2); } /** @@ -149,10 +149,10 @@ static int read_node(unsigned int lpar_id, u64 n1, u64 n2, u64 n3, u64 n4, *_v2 = v2; if (v1 && !_v1) - pr_debug("%s:%d: warning: discarding non-zero v1: %016lx\n", + pr_debug("%s:%d: warning: discarding non-zero v1: %016llx\n", __func__, __LINE__, v1); if (v2 && !_v2) - pr_debug("%s:%d: warning: discarding non-zero v2: %016lx\n", + pr_debug("%s:%d: warning: discarding non-zero v2: %016llx\n", __func__, __LINE__, v2); return 0; @@ -327,7 +327,7 @@ int ps3_repository_find_device(struct ps3_repository_device *repo) return result; } - pr_debug("%s:%d: bus_type %u, bus_index %u, bus_id %lu, num_dev %u\n", + pr_debug("%s:%d: bus_type %u, bus_index %u, bus_id %llu, num_dev %u\n", __func__, __LINE__, tmp.bus_type, tmp.bus_index, tmp.bus_id, num_dev); @@ -353,7 +353,7 @@ int ps3_repository_find_device(struct ps3_repository_device *repo) return result; } - pr_debug("%s:%d: found: dev_type %u, dev_index %u, dev_id %lu\n", + pr_debug("%s:%d: found: dev_type %u, dev_index %u, dev_id %llu\n", __func__, __LINE__, tmp.dev_type, tmp.dev_index, tmp.dev_id); *repo = tmp; @@ -367,7 +367,7 @@ int ps3_repository_find_device_by_id(struct ps3_repository_device *repo, struct ps3_repository_device tmp; unsigned int num_dev; - pr_debug(" -> %s:%u: find device by id %lu:%lu\n", __func__, __LINE__, + pr_debug(" -> %s:%u: find device by id %llu:%llu\n", __func__, __LINE__, bus_id, dev_id); for (tmp.bus_index = 0; tmp.bus_index < 10; tmp.bus_index++) { @@ -382,7 +382,7 @@ int ps3_repository_find_device_by_id(struct ps3_repository_device *repo, if (tmp.bus_id == bus_id) goto found_bus; - pr_debug("%s:%u: skip, bus_id %lu\n", __func__, __LINE__, + pr_debug("%s:%u: skip, bus_id %llu\n", __func__, __LINE__, tmp.bus_id); } pr_debug(" <- %s:%u: bus not found\n", __func__, __LINE__); @@ -416,7 +416,7 @@ found_bus: if (tmp.dev_id == dev_id) goto found_dev; - pr_debug("%s:%u: skip, dev_id %lu\n", __func__, __LINE__, + pr_debug("%s:%u: skip, dev_id %llu\n", __func__, __LINE__, tmp.dev_id); } pr_debug(" <- %s:%u: dev not found\n", __func__, __LINE__); @@ -430,7 +430,7 @@ found_dev: return result; } - pr_debug(" <- %s:%u: found: type (%u:%u) index (%u:%u) id (%lu:%lu)\n", + pr_debug(" <- %s:%u: found: type (%u:%u) index (%u:%u) id (%llu:%llu)\n", __func__, __LINE__, tmp.bus_type, tmp.dev_type, tmp.bus_index, tmp.dev_index, tmp.bus_id, tmp.dev_id); *repo = tmp; diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 9bd4d677d3f..58311a86785 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -182,7 +182,7 @@ int ps3_open_hv_device(struct ps3_system_bus_device *dev) case PS3_MATCH_ID_SYSTEM_MANAGER: pr_debug("%s:%d: unsupported match_id: %u\n", __func__, __LINE__, dev->match_id); - pr_debug("%s:%d: bus_id: %lu\n", __func__, __LINE__, + pr_debug("%s:%d: bus_id: %llu\n", __func__, __LINE__, dev->bus_id); BUG(); return -EINVAL; @@ -220,7 +220,7 @@ int ps3_close_hv_device(struct ps3_system_bus_device *dev) case PS3_MATCH_ID_SYSTEM_MANAGER: pr_debug("%s:%d: unsupported match_id: %u\n", __func__, __LINE__, dev->match_id); - pr_debug("%s:%d: bus_id: %lu\n", __func__, __LINE__, + pr_debug("%s:%d: bus_id: %llu\n", __func__, __LINE__, dev->bus_id); BUG(); return -EINVAL; @@ -240,7 +240,7 @@ EXPORT_SYMBOL_GPL(ps3_close_hv_device); static void _dump_mmio_region(const struct ps3_mmio_region* r, const char* func, int line) { - pr_debug("%s:%d: dev %lu:%lu\n", func, line, r->dev->bus_id, + pr_debug("%s:%d: dev %llu:%llu\n", func, line, r->dev->bus_id, r->dev->dev_id); pr_debug("%s:%d: bus_addr %lxh\n", func, line, r->bus_addr); pr_debug("%s:%d: len %lxh\n", func, line, r->len); -- cgit From e377c6e24d1b465ebd18fe49d0dc06932ced9bb0 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 13 Jan 2009 20:04:48 +0000 Subject: powerpc/ps3: Printing fixups for l64 to ll64 conversion drivers/block Signed-off-by: Stephen Rothwell Acked-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- drivers/block/ps3disk.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/block/ps3disk.c b/drivers/block/ps3disk.c index 936466f62af..bccc42bb921 100644 --- a/drivers/block/ps3disk.c +++ b/drivers/block/ps3disk.c @@ -141,7 +141,7 @@ static int ps3disk_submit_request_sg(struct ps3_storage_device *dev, start_sector = req->sector * priv->blocking_factor; sectors = req->nr_sectors * priv->blocking_factor; - dev_dbg(&dev->sbd.core, "%s:%u: %s %lu sectors starting at %lu\n", + dev_dbg(&dev->sbd.core, "%s:%u: %s %llu sectors starting at %llu\n", __func__, __LINE__, op, sectors, start_sector); if (write) { @@ -178,7 +178,7 @@ static int ps3disk_submit_flush_request(struct ps3_storage_device *dev, LV1_STORAGE_ATA_HDDOUT, 0, 0, 0, 0, &dev->tag); if (res) { - dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%lx\n", + dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n", __func__, __LINE__, res); end_request(req, 0); return 0; @@ -238,11 +238,11 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data) if (tag != dev->tag) dev_err(&dev->sbd.core, - "%s:%u: tag mismatch, got %lx, expected %lx\n", + "%s:%u: tag mismatch, got %llx, expected %llx\n", __func__, __LINE__, tag, dev->tag); if (res) { - dev_err(&dev->sbd.core, "%s:%u: res=%d status=0x%lx\n", + dev_err(&dev->sbd.core, "%s:%u: res=%d status=0x%llx\n", __func__, __LINE__, res, status); return IRQ_HANDLED; } @@ -269,7 +269,7 @@ static irqreturn_t ps3disk_interrupt(int irq, void *data) op = read ? "read" : "write"; } if (status) { - dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%lx\n", __func__, + dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__, __LINE__, op, status); error = -EIO; } else { @@ -297,7 +297,7 @@ static int ps3disk_sync_cache(struct ps3_storage_device *dev) res = ps3stor_send_command(dev, LV1_STORAGE_ATA_HDDOUT, 0, 0, 0, 0); if (res) { - dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%lx\n", + dev_err(&dev->sbd.core, "%s:%u: sync cache failed 0x%llx\n", __func__, __LINE__, res); return -EIO; } @@ -388,7 +388,7 @@ static int ps3disk_identify(struct ps3_storage_device *dev) sizeof(ata_cmnd), ata_cmnd.buffer, ata_cmnd.arglen); if (res) { - dev_err(&dev->sbd.core, "%s:%u: identify disk failed 0x%lx\n", + dev_err(&dev->sbd.core, "%s:%u: identify disk failed 0x%llx\n", __func__, __LINE__, res); return -EIO; } @@ -426,7 +426,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev) if (dev->blk_size < 512) { dev_err(&dev->sbd.core, - "%s:%u: cannot handle block size %lu\n", __func__, + "%s:%u: cannot handle block size %llu\n", __func__, __LINE__, dev->blk_size); return -EINVAL; } @@ -512,7 +512,7 @@ static int __devinit ps3disk_probe(struct ps3_system_bus_device *_dev) dev->regions[dev->region_idx].size*priv->blocking_factor); dev_info(&dev->sbd.core, - "%s is a %s (%lu MiB total, %lu MiB for OtherOS)\n", + "%s is a %s (%llu MiB total, %lu MiB for OtherOS)\n", gendisk->disk_name, priv->model, priv->raw_capacity >> 11, get_capacity(gendisk) >> 11); -- cgit From 4c33d2dc34f50e5d5436e62cd99395859f157a19 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 13 Jan 2009 20:06:02 +0000 Subject: powerpc/ps3: Printing fixups for l64 to ll64 conversion drivers/char Also a couple of min -> min_t changes. Signed-off-by: Stephen Rothwell Acked-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- drivers/char/ps3flash.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/char/ps3flash.c b/drivers/char/ps3flash.c index 79b6f461be7..afbe45676d7 100644 --- a/drivers/char/ps3flash.c +++ b/drivers/char/ps3flash.c @@ -44,7 +44,7 @@ static ssize_t ps3flash_read_write_sectors(struct ps3_storage_device *dev, u64 res = ps3stor_read_write_sectors(dev, lpar, start_sector, sectors, write); if (res) { - dev_err(&dev->sbd.core, "%s:%u: %s failed 0x%lx\n", __func__, + dev_err(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__, __LINE__, write ? "write" : "read", res); return -EIO; } @@ -59,7 +59,7 @@ static ssize_t ps3flash_read_sectors(struct ps3_storage_device *dev, max_sectors = dev->bounce_size / dev->blk_size; if (sectors > max_sectors) { - dev_dbg(&dev->sbd.core, "%s:%u Limiting sectors to %lu\n", + dev_dbg(&dev->sbd.core, "%s:%u Limiting sectors to %llu\n", __func__, __LINE__, max_sectors); sectors = max_sectors; } @@ -144,7 +144,7 @@ static ssize_t ps3flash_read(struct file *file, char __user *buf, size_t count, goto fail; } - n = min(remaining, sectors_read*dev->blk_size-offset); + n = min_t(u64, remaining, sectors_read*dev->blk_size-offset); dev_dbg(&dev->sbd.core, "%s:%u: copy %lu bytes from 0x%p to user 0x%p\n", __func__, __LINE__, n, dev->bounce_buf+offset, buf); @@ -225,7 +225,7 @@ static ssize_t ps3flash_write(struct file *file, const char __user *buf, if (end_read_sector >= start_read_sector) { /* Merge head and tail */ dev_dbg(&dev->sbd.core, - "Merged head and tail: %lu sectors at %lu\n", + "Merged head and tail: %llu sectors at %llu\n", chunk_sectors, start_write_sector); res = ps3flash_read_sectors(dev, start_write_sector, chunk_sectors, 0); @@ -235,7 +235,7 @@ static ssize_t ps3flash_write(struct file *file, const char __user *buf, if (head) { /* Read head */ dev_dbg(&dev->sbd.core, - "head: %lu sectors at %lu\n", head, + "head: %llu sectors at %llu\n", head, start_write_sector); res = ps3flash_read_sectors(dev, start_write_sector, @@ -247,7 +247,7 @@ static ssize_t ps3flash_write(struct file *file, const char __user *buf, start_write_sector+chunk_sectors) { /* Read tail */ dev_dbg(&dev->sbd.core, - "tail: %lu sectors at %lu\n", tail, + "tail: %llu sectors at %llu\n", tail, start_read_sector); sec_off = start_read_sector-start_write_sector; res = ps3flash_read_sectors(dev, @@ -258,7 +258,7 @@ static ssize_t ps3flash_write(struct file *file, const char __user *buf, } } - n = min(remaining, dev->bounce_size-offset); + n = min_t(u64, remaining, dev->bounce_size-offset); dev_dbg(&dev->sbd.core, "%s:%u: copy %lu bytes from user 0x%p to 0x%p\n", __func__, __LINE__, n, buf, dev->bounce_buf+offset); @@ -299,11 +299,11 @@ static irqreturn_t ps3flash_interrupt(int irq, void *data) if (tag != dev->tag) dev_err(&dev->sbd.core, - "%s:%u: tag mismatch, got %lx, expected %lx\n", + "%s:%u: tag mismatch, got %llx, expected %llx\n", __func__, __LINE__, tag, dev->tag); if (res) { - dev_err(&dev->sbd.core, "%s:%u: res=%d status=0x%lx\n", + dev_err(&dev->sbd.core, "%s:%u: res=%d status=0x%llx\n", __func__, __LINE__, res, status); } else { dev->lv1_status = status; -- cgit From 26db11af12fb58dde1d6327035a248a99f60f112 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 13 Jan 2009 20:07:55 +0000 Subject: powerpc/ps3: Printing fixups for l64 to ll64 conversion sound/ppc Signed-off-by: Stephen Rothwell Acked-by: Takashi Iwai Acked-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- sound/ppc/snd_ps3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/ppc/snd_ps3.c b/sound/ppc/snd_ps3.c index 8f9e3859c37..ff321110ec0 100644 --- a/sound/ppc/snd_ps3.c +++ b/sound/ppc/snd_ps3.c @@ -477,7 +477,7 @@ static int snd_ps3_pcm_prepare(struct snd_pcm_substream *substream) card->dma_start_bus_addr[SND_PS3_CH_R] = runtime->dma_addr + (runtime->dma_bytes / 2); - pr_debug("%s: vaddr=%p bus=%#lx\n", __func__, + pr_debug("%s: vaddr=%p bus=%#llx\n", __func__, card->dma_start_vaddr[SND_PS3_CH_L], card->dma_start_bus_addr[SND_PS3_CH_L]); @@ -1030,7 +1030,7 @@ static int __init snd_ps3_driver_probe(struct ps3_system_bus_device *dev) pr_info("%s: nullbuffer alloc failed\n", __func__); goto clean_preallocate; } - pr_debug("%s: null vaddr=%p dma=%#lx\n", __func__, + pr_debug("%s: null vaddr=%p dma=%#llx\n", __func__, the_card.null_buffer_start_vaddr, the_card.null_buffer_start_dma_addr); /* set default sample rate/word width */ -- cgit From a9dad6e598155e2a548142336cd833e5360335d1 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 13 Jan 2009 20:10:06 +0000 Subject: powerpc/ps3: Printing fixups for l64 to ll64 conversion drivers/ps3 Also some min -> mint_t conversion. Signed-off-by: Stephen Rothwell Acked-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- drivers/ps3/ps3-lpm.c | 16 ++++++++-------- drivers/ps3/ps3-vuart.c | 8 ++++---- drivers/ps3/ps3stor_lib.c | 14 +++++++------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/ps3/ps3-lpm.c b/drivers/ps3/ps3-lpm.c index 204158cf7a5..fe96793e3f0 100644 --- a/drivers/ps3/ps3-lpm.c +++ b/drivers/ps3/ps3-lpm.c @@ -732,7 +732,7 @@ static u64 pm_signal_group_to_ps3_lv1_signal_group(u64 group) case 8: return pm_translate_signal_group_number_on_island8(subgroup); default: - dev_dbg(sbd_core(), "%s:%u: island not found: %lu\n", __func__, + dev_dbg(sbd_core(), "%s:%u: island not found: %llu\n", __func__, __LINE__, group); BUG(); break; @@ -765,7 +765,7 @@ static int __ps3_set_signal(u64 lv1_signal_group, u64 bus_select, signal_select, attr1, attr2, attr3); if (ret) dev_err(sbd_core(), - "%s:%u: error:%d 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + "%s:%u: error:%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n", __func__, __LINE__, ret, lv1_signal_group, bus_select, signal_select, attr1, attr2, attr3); @@ -908,7 +908,7 @@ void ps3_disable_pm(u32 cpu) lpm_priv->tb_count = tmp; - dev_dbg(sbd_core(), "%s:%u: tb_count %lu (%lxh)\n", __func__, __LINE__, + dev_dbg(sbd_core(), "%s:%u: tb_count %llu (%llxh)\n", __func__, __LINE__, lpm_priv->tb_count, lpm_priv->tb_count); } EXPORT_SYMBOL_GPL(ps3_disable_pm); @@ -938,7 +938,7 @@ int ps3_lpm_copy_tb(unsigned long offset, void *buf, unsigned long count, if (offset >= lpm_priv->tb_count) return 0; - count = min(count, lpm_priv->tb_count - offset); + count = min_t(u64, count, lpm_priv->tb_count - offset); while (*bytes_copied < count) { const unsigned long request = count - *bytes_copied; @@ -993,7 +993,7 @@ int ps3_lpm_copy_tb_to_user(unsigned long offset, void __user *buf, if (offset >= lpm_priv->tb_count) return 0; - count = min(count, lpm_priv->tb_count - offset); + count = min_t(u64, count, lpm_priv->tb_count - offset); while (*bytes_copied < count) { const unsigned long request = count - *bytes_copied; @@ -1013,7 +1013,7 @@ int ps3_lpm_copy_tb_to_user(unsigned long offset, void __user *buf, result = copy_to_user(buf, lpm_priv->tb_cache, tmp); if (result) { - dev_dbg(sbd_core(), "%s:%u: 0x%lx bytes at 0x%p\n", + dev_dbg(sbd_core(), "%s:%u: 0x%llx bytes at 0x%p\n", __func__, __LINE__, tmp, buf); dev_err(sbd_core(), "%s:%u: copy_to_user failed: %d\n", __func__, __LINE__, result); @@ -1148,8 +1148,8 @@ int ps3_lpm_open(enum ps3_lpm_tb_type tb_type, void *tb_cache, lpm_priv->shadow.group_control = PS3_LPM_SHADOW_REG_INIT; lpm_priv->shadow.debug_bus_control = PS3_LPM_SHADOW_REG_INIT; - dev_dbg(sbd_core(), "%s:%u: lpm_id 0x%lx, outlet_id 0x%lx, " - "tb_size 0x%lx\n", __func__, __LINE__, lpm_priv->lpm_id, + dev_dbg(sbd_core(), "%s:%u: lpm_id 0x%llx, outlet_id 0x%llx, " + "tb_size 0x%llx\n", __func__, __LINE__, lpm_priv->lpm_id, lpm_priv->outlet_id, tb_size); return 0; diff --git a/drivers/ps3/ps3-vuart.c b/drivers/ps3/ps3-vuart.c index 264c4475014..e4ad5ba5d0a 100644 --- a/drivers/ps3/ps3-vuart.c +++ b/drivers/ps3/ps3-vuart.c @@ -114,7 +114,7 @@ struct ports_bmp { static void __maybe_unused _dump_ports_bmp( const struct ports_bmp *bmp, const char *func, int line) { - pr_debug("%s:%d: ports_bmp: %016lxh\n", func, line, bmp->status); + pr_debug("%s:%d: ports_bmp: %016llxh\n", func, line, bmp->status); } #define dump_port_params(_b) _dump_port_params(_b, __func__, __LINE__) @@ -250,7 +250,7 @@ static int ps3_vuart_get_rx_bytes_waiting(struct ps3_system_bus_device *dev, dev_dbg(&dev->core, "%s:%d: rx_bytes failed: %s\n", __func__, __LINE__, ps3_result(result)); - dev_dbg(&dev->core, "%s:%d: %lxh\n", __func__, __LINE__, + dev_dbg(&dev->core, "%s:%d: %llxh\n", __func__, __LINE__, *bytes_waiting); return result; } @@ -297,7 +297,7 @@ static int ps3_vuart_get_interrupt_status(struct ps3_system_bus_device *dev, *status = tmp & priv->interrupt_mask; - dev_dbg(&dev->core, "%s:%d: m %lxh, s %lxh, m&s %lxh\n", + dev_dbg(&dev->core, "%s:%d: m %llxh, s %llxh, m&s %lxh\n", __func__, __LINE__, priv->interrupt_mask, tmp, *status); return result; @@ -594,7 +594,7 @@ static int ps3_vuart_queue_rx_bytes(struct ps3_system_bus_device *dev, list_add_tail(&lb->link, &priv->rx_list.head); priv->rx_list.bytes_held += bytes; - dev_dbg(&dev->core, "%s:%d: buf_%lu: queued %lxh bytes\n", + dev_dbg(&dev->core, "%s:%d: buf_%lu: queued %llxh bytes\n", __func__, __LINE__, lb->dbg_number, bytes); *bytes_queued = bytes; diff --git a/drivers/ps3/ps3stor_lib.c b/drivers/ps3/ps3stor_lib.c index 55955f16ad9..18066d55539 100644 --- a/drivers/ps3/ps3stor_lib.c +++ b/drivers/ps3/ps3stor_lib.c @@ -70,7 +70,7 @@ static int ps3stor_probe_access(struct ps3_storage_device *dev) __func__, __LINE__, n); dev->region_idx = __ffs(dev->accessible_regions); dev_info(&dev->sbd.core, - "First accessible region has index %u start %lu size %lu\n", + "First accessible region has index %u start %llu size %llu\n", dev->region_idx, dev->regions[dev->region_idx].start, dev->regions[dev->region_idx].size); @@ -220,7 +220,7 @@ u64 ps3stor_read_write_sectors(struct ps3_storage_device *dev, u64 lpar, const char *op = write ? "write" : "read"; int res; - dev_dbg(&dev->sbd.core, "%s:%u: %s %lu sectors starting at %lu\n", + dev_dbg(&dev->sbd.core, "%s:%u: %s %llu sectors starting at %llu\n", __func__, __LINE__, op, sectors, start_sector); init_completion(&dev->done); @@ -238,7 +238,7 @@ u64 ps3stor_read_write_sectors(struct ps3_storage_device *dev, u64 lpar, wait_for_completion(&dev->done); if (dev->lv1_status) { - dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%lx\n", __func__, + dev_dbg(&dev->sbd.core, "%s:%u: %s failed 0x%llx\n", __func__, __LINE__, op, dev->lv1_status); return dev->lv1_status; } @@ -268,7 +268,7 @@ u64 ps3stor_send_command(struct ps3_storage_device *dev, u64 cmd, u64 arg1, { int res; - dev_dbg(&dev->sbd.core, "%s:%u: send device command 0x%lx\n", __func__, + dev_dbg(&dev->sbd.core, "%s:%u: send device command 0x%llx\n", __func__, __LINE__, cmd); init_completion(&dev->done); @@ -277,19 +277,19 @@ u64 ps3stor_send_command(struct ps3_storage_device *dev, u64 cmd, u64 arg1, arg2, arg3, arg4, &dev->tag); if (res) { dev_err(&dev->sbd.core, - "%s:%u: send_device_command 0x%lx failed %d\n", + "%s:%u: send_device_command 0x%llx failed %d\n", __func__, __LINE__, cmd, res); return -1; } wait_for_completion(&dev->done); if (dev->lv1_status) { - dev_dbg(&dev->sbd.core, "%s:%u: command 0x%lx failed 0x%lx\n", + dev_dbg(&dev->sbd.core, "%s:%u: command 0x%llx failed 0x%llx\n", __func__, __LINE__, cmd, dev->lv1_status); return dev->lv1_status; } - dev_dbg(&dev->sbd.core, "%s:%u: command 0x%lx completed\n", __func__, + dev_dbg(&dev->sbd.core, "%s:%u: command 0x%llx completed\n", __func__, __LINE__, cmd); return 0; -- cgit From 7ad489e3d2459f7520d9062cb855ff29a8dd683d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 13 Jan 2009 20:11:44 +0000 Subject: powerpc/ps3: Printing fixups for l64 to ll64 conversion drivers/scsi Signed-off-by: Stephen Rothwell Acked-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- drivers/scsi/ps3rom.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/ps3rom.c b/drivers/scsi/ps3rom.c index ce48e2d0193..ca0dd33497e 100644 --- a/drivers/scsi/ps3rom.c +++ b/drivers/scsi/ps3rom.c @@ -290,11 +290,11 @@ static irqreturn_t ps3rom_interrupt(int irq, void *data) if (tag != dev->tag) dev_err(&dev->sbd.core, - "%s:%u: tag mismatch, got %lx, expected %lx\n", + "%s:%u: tag mismatch, got %llx, expected %llx\n", __func__, __LINE__, tag, dev->tag); if (res) { - dev_err(&dev->sbd.core, "%s:%u: res=%d status=0x%lx\n", + dev_err(&dev->sbd.core, "%s:%u: res=%d status=0x%llx\n", __func__, __LINE__, res, status); return IRQ_HANDLED; } @@ -364,7 +364,7 @@ static int __devinit ps3rom_probe(struct ps3_system_bus_device *_dev) if (dev->blk_size != CD_FRAMESIZE) { dev_err(&dev->sbd.core, - "%s:%u: cannot handle block size %lu\n", __func__, + "%s:%u: cannot handle block size %llu\n", __func__, __LINE__, dev->blk_size); return -EINVAL; } -- cgit From 5d9ee3ff3dbf815cd9ee18c166bb9b538e8057d2 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 13 Jan 2009 20:13:39 +0000 Subject: powerpc/ps3: printing fixups for l64 to ll64 conversion drivers/video Signed-off-by: Stephen Rothwell Acked-by: Geoff Levand Signed-off-by: Benjamin Herrenschmidt --- drivers/video/ps3fb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/ps3fb.c b/drivers/video/ps3fb.c index 38ac805db97..87f826e4c95 100644 --- a/drivers/video/ps3fb.c +++ b/drivers/video/ps3fb.c @@ -1006,7 +1006,7 @@ static int ps3fb_xdr_settings(u64 xdr_lpar, struct device *dev) __func__, status); return -ENXIO; } - dev_dbg(dev, "video:%p ioif:%lx lpar:%lx size:%lx\n", + dev_dbg(dev, "video:%p ioif:%lx lpar:%llx size:%lx\n", ps3fb_videomemory.address, GPU_IOIF, xdr_lpar, ps3fb_videomemory.size); @@ -1133,7 +1133,7 @@ static int __devinit ps3fb_probe(struct ps3_system_bus_device *dev) __func__, status); goto err; } - dev_dbg(&dev->core, "ddr:lpar:0x%lx\n", ddr_lpar); + dev_dbg(&dev->core, "ddr:lpar:0x%llx\n", ddr_lpar); status = lv1_gpu_context_allocate(ps3fb.memory_handle, 0, &ps3fb.context_handle, -- cgit From 9ba0fdbfaed2e74005d87fab948c5522b86ff733 Mon Sep 17 00:00:00 2001 From: Dave Kleikamp Date: Wed, 14 Jan 2009 09:09:34 +0000 Subject: powerpc: is_hugepage_only_range() must account for both 4kB and 64kB slices powerpc: is_hugepage_only_range() must account for both 4kB and 64kB slices The subpage_prot syscall fails on second and subsequent calls for a given region, because is_hugepage_only_range() is mis-identifying the 4 kB slices when the process has a 64 kB page size. Signed-off-by: Dave Kleikamp Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/slice.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c index db44e02e045..ba5194817f8 100644 --- a/arch/powerpc/mm/slice.c +++ b/arch/powerpc/mm/slice.c @@ -710,9 +710,18 @@ int is_hugepage_only_range(struct mm_struct *mm, unsigned long addr, unsigned long len) { struct slice_mask mask, available; + unsigned int psize = mm->context.user_psize; mask = slice_range_to_mask(addr, len); - available = slice_mask_for_size(mm, mm->context.user_psize); + available = slice_mask_for_size(mm, psize); +#ifdef CONFIG_PPC_64K_PAGES + /* We need to account for 4k slices too */ + if (psize == MMU_PAGE_64K) { + struct slice_mask compat_mask; + compat_mask = slice_mask_for_size(mm, MMU_PAGE_4K); + or_mask(available, compat_mask); + } +#endif #if 0 /* too verbose */ slice_dbg("is_hugepage_only_range(mm=%p, addr=%lx, len=%lx)\n", -- cgit From b60c31d85a2a2f1a9c8623f1fb56e2688aa1e2b1 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Wed, 14 Jan 2009 13:42:41 +0000 Subject: powerpc: Get the number of SLBs from "slb-size" property The PAPR says that the property for specifying the number of SLBs should be called "slb-size". We currently only look for "ibm,slb-size" because this is what firmware actually presents. This patch makes us look for the "slb-size" property as well and in preference to the "ibm,slb-size". This should future proof us if firmware changes to match PAPR. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index c09cffafb6e..f00f83109ab 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -590,6 +590,11 @@ static void __init check_cpu_slb_size(unsigned long node) { u32 *slb_size_ptr; + slb_size_ptr = of_get_flat_dt_prop(node, "slb-size", NULL); + if (slb_size_ptr != NULL) { + mmu_slb_size = *slb_size_ptr; + return; + } slb_size_ptr = of_get_flat_dt_prop(node, "ibm,slb-size", NULL); if (slb_size_ptr != NULL) { mmu_slb_size = *slb_size_ptr; -- cgit From 7f8030d166f84b7413a0393819cb8b800484d5a9 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 14 Jan 2009 23:15:44 +0000 Subject: hvc_console: Remove tty->low_latency This patch removes the tty->low_latency setting. For irq based hvc_console backends the tty->low_latency must be set to 0, because the tty_flip_buffer_push() function must not be called from IRQ context (see drivers/char/tty_buffer.c). For polled backends, the low_latency setting causes the bug trace below, because tty_flip_buffer_push() is called within an atomic context and subsequent calls might sleep due to mutex_lock. BUG: sleeping function called from invalid context at /root/cvs/linux-2.6.git/kernel/mutex.c:207 in_atomic(): 1, irqs_disabled(): 0, pid: 748, name: khvcd 1 lock held by khvcd/748: #0: (hvc_structs_lock){--..}, at: [<00000000002ceb50>] khvcd+0x58/0x12c CPU: 0 Not tainted 2.6.29-rc1git #29 Process khvcd (pid: 748, task: 000000002fb9a480, ksp: 000000002f66bd78) 070000000000000a 000000002f66ba00 0000000000000002 (null) 000000002f66baa0 000000002f66ba18 000000002f66ba18 0000000000104f08 ffffffffffffc000 000000002f66bd78 (null) (null) 000000002f66ba00 000000000000000c 000000002f66ba00 000000002f66ba70 0000000000466af8 0000000000104f08 000000002f66ba00 000000002f66ba50 Call Trace: ([<0000000000104e7c>] show_trace+0x138/0x158) [<0000000000104f62>] show_stack+0xc6/0xf8 [<0000000000105740>] dump_stack+0xb0/0xc0 [<000000000013144a>] __might_sleep+0x14e/0x17c [<000000000045e226>] mutex_lock_nested+0x42/0x3b4 [<00000000002c443e>] echo_char_raw+0x3a/0x9c [<00000000002c688c>] n_tty_receive_buf+0x1154/0x1208 [<00000000002ca0a2>] flush_to_ldisc+0x152/0x220 [<00000000002ca1da>] tty_flip_buffer_push+0x6a/0x90 [<00000000002cea74>] hvc_poll+0x244/0x2c8 [<00000000002ceb68>] khvcd+0x70/0x12c [<000000000015bbd0>] kthread+0x68/0xa0 [<0000000000109d5a>] kernel_thread_starter+0x6/0xc [<0000000000109d54>] kernel_thread_starter+0x0/0xc 1 lock held by khvcd/748: #0: (hvc_structs_lock){--..}, at: [<00000000002ceb50>] khvcd+0x58/0x12c Signed-off-by: Hendrik Brueckner Acked-by: Christian Borntraeger Signed-off-by: Benjamin Herrenschmidt --- drivers/char/hvc_console.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/char/hvc_console.c b/drivers/char/hvc_console.c index 09676b4e5d8..94e7e3c8c05 100644 --- a/drivers/char/hvc_console.c +++ b/drivers/char/hvc_console.c @@ -318,8 +318,6 @@ static int hvc_open(struct tty_struct *tty, struct file * filp) } /* else count == 0 */ tty->driver_data = hp; - if (!hp->irq_requested) - tty->low_latency = 1; /* Makes flushes to ldisc synchronous. */ hp->tty = tty; -- cgit From 0025e75357e7e8daf03a74294c98425a80a433c7 Mon Sep 17 00:00:00 2001 From: Matthias Fuchs Date: Thu, 15 Jan 2009 09:43:35 +0000 Subject: serial: Add 16850 uart type support to OF uart driver This patch adds support for "ns16850" as supported value of the compatible node in flat device tree uart descriptions. This is needed for example when you have a XR16C2850 uart connected to a PPC405's external bus controller. Signed-off-by: Matthias Fuchs Signed-off-by: Benjamin Herrenschmidt --- drivers/serial/of_serial.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/serial/of_serial.c b/drivers/serial/of_serial.c index a821e3a3d66..14f8fa9135b 100644 --- a/drivers/serial/of_serial.c +++ b/drivers/serial/of_serial.c @@ -163,6 +163,7 @@ static struct of_device_id __devinitdata of_platform_serial_table[] = { { .type = "serial", .compatible = "ns16450", .data = (void *)PORT_16450, }, { .type = "serial", .compatible = "ns16550", .data = (void *)PORT_16550, }, { .type = "serial", .compatible = "ns16750", .data = (void *)PORT_16750, }, + { .type = "serial", .compatible = "ns16850", .data = (void *)PORT_16850, }, #ifdef CONFIG_SERIAL_OF_PLATFORM_NWPSERIAL { .type = "serial", .compatible = "ibm,qpace-nwp-serial", .data = (void *)PORT_NWPSERIAL, }, -- cgit From 40a518d9f1fd8ed1061b8b4e2ce8a44794f4eb03 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Mon, 12 Jan 2009 12:05:32 -0800 Subject: drm: initial KMS config fixes When mode setting is first initialized, the driver will call into drm_helper_initial_config() to set up an initial output and framebuffer configuration. This routine is responsible for probing the available connectors, encoders, and crtcs, looking for modes and putting together something reasonable (where reasonable is defined as "allows kernel messages to be visible on as many displays as possible"). However, the code was a bit too aggressive in setting default modes when none were found on a given connector. Even if some connectors had modes, any connectors found lacking modes would have the default 800x600 mode added to their mode list, which in some cases could cause problems later down the line. In my case, the LVDS was perfectly available, but the initial config code added 800x600 modes to both of the detected but unavailable HDMI connectors (which are on my non-existent docking station). This ended up preventing later code from setting a mode on my LVDS, which is bad. This patch fixes that behavior by making the initial config code walk through the connectors first, counting the available modes, before it decides to add any default modes to a possibly connected output. It also fixes the logic in drm_target_preferred() that was causing zeroed out modes to be set as the preferred mode for a given connector, even if no modes were available. Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 127 ++++++++++++++++++++++++++++---------- include/drm/drm_crtc.h | 2 +- include/drm/drm_crtc_helper.h | 2 +- 3 files changed, 95 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index d8a982b7129..e490e69db21 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -36,7 +36,7 @@ /* * Detailed mode info for 800x600@60Hz */ -static struct drm_display_mode std_mode[] = { +static struct drm_display_mode std_modes[] = { { DRM_MODE("800x600", DRM_MODE_TYPE_DEFAULT, 40000, 800, 840, 968, 1056, 0, 600, 601, 605, 628, 0, DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, @@ -60,15 +60,18 @@ static struct drm_display_mode std_mode[] = { * changes have occurred. * * FIXME: take into account monitor limits + * + * RETURNS: + * Number of modes found on @connector. */ -void drm_helper_probe_single_connector_modes(struct drm_connector *connector, - uint32_t maxX, uint32_t maxY) +int drm_helper_probe_single_connector_modes(struct drm_connector *connector, + uint32_t maxX, uint32_t maxY) { struct drm_device *dev = connector->dev; struct drm_display_mode *mode, *t; struct drm_connector_helper_funcs *connector_funcs = connector->helper_private; - int ret; + int count = 0; DRM_DEBUG("%s\n", drm_get_connector_name(connector)); /* set all modes to the unverified state */ @@ -81,14 +84,14 @@ void drm_helper_probe_single_connector_modes(struct drm_connector *connector, DRM_DEBUG("%s is disconnected\n", drm_get_connector_name(connector)); /* TODO set EDID to NULL */ - return; + return 0; } - ret = (*connector_funcs->get_modes)(connector); + count = (*connector_funcs->get_modes)(connector); + if (!count) + return 0; - if (ret) { - drm_mode_connector_list_update(connector); - } + drm_mode_connector_list_update(connector); if (maxX && maxY) drm_mode_validate_size(dev, &connector->modes, maxX, @@ -102,25 +105,8 @@ void drm_helper_probe_single_connector_modes(struct drm_connector *connector, drm_mode_prune_invalid(dev, &connector->modes, true); - if (list_empty(&connector->modes)) { - struct drm_display_mode *stdmode; - - DRM_DEBUG("No valid modes on %s\n", - drm_get_connector_name(connector)); - - /* Should we do this here ??? - * When no valid EDID modes are available we end up - * here and bailed in the past, now we add a standard - * 640x480@60Hz mode and carry on. - */ - stdmode = drm_mode_duplicate(dev, &std_mode[0]); - drm_mode_probed_add(connector, stdmode); - drm_mode_list_concat(&connector->probed_modes, - &connector->modes); - - DRM_DEBUG("Adding standard 640x480 @ 60Hz to %s\n", - drm_get_connector_name(connector)); - } + if (list_empty(&connector->modes)) + return 0; drm_mode_sort(&connector->modes); @@ -131,20 +117,58 @@ void drm_helper_probe_single_connector_modes(struct drm_connector *connector, drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V); drm_mode_debug_printmodeline(mode); } + + return count; } EXPORT_SYMBOL(drm_helper_probe_single_connector_modes); -void drm_helper_probe_connector_modes(struct drm_device *dev, uint32_t maxX, +int drm_helper_probe_connector_modes(struct drm_device *dev, uint32_t maxX, uint32_t maxY) { struct drm_connector *connector; + int count = 0; list_for_each_entry(connector, &dev->mode_config.connector_list, head) { - drm_helper_probe_single_connector_modes(connector, maxX, maxY); + count += drm_helper_probe_single_connector_modes(connector, + maxX, maxY); } + + return count; } EXPORT_SYMBOL(drm_helper_probe_connector_modes); +static void drm_helper_add_std_modes(struct drm_device *dev, + struct drm_connector *connector) +{ + struct drm_display_mode *mode, *t; + int i; + + for (i = 0; i < ARRAY_SIZE(std_modes); i++) { + struct drm_display_mode *stdmode; + + /* + * When no valid EDID modes are available we end up + * here and bailed in the past, now we add some standard + * modes and move on. + */ + stdmode = drm_mode_duplicate(dev, &std_modes[i]); + drm_mode_probed_add(connector, stdmode); + drm_mode_list_concat(&connector->probed_modes, + &connector->modes); + + DRM_DEBUG("Adding mode %s to %s\n", stdmode->name, + drm_get_connector_name(connector)); + } + drm_mode_sort(&connector->modes); + + DRM_DEBUG("Added std modes on %s\n", drm_get_connector_name(connector)); + list_for_each_entry_safe(mode, t, &connector->modes, head) { + mode->vrefresh = drm_mode_vrefresh(mode); + + drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V); + drm_mode_debug_printmodeline(mode); + } +} /** * drm_helper_crtc_in_use - check if a given CRTC is in a mode_config @@ -237,6 +261,8 @@ static void drm_enable_connectors(struct drm_device *dev, bool *enabled) list_for_each_entry(connector, &dev->mode_config.connector_list, head) { enabled[i] = drm_connector_enabled(connector, true); + DRM_DEBUG("connector %d enabled? %s\n", connector->base.id, + enabled[i] ? "yes" : "no"); any_enabled |= enabled[i]; i++; } @@ -265,11 +291,17 @@ static bool drm_target_preferred(struct drm_device *dev, continue; } + DRM_DEBUG("looking for preferred mode on connector %d\n", + connector->base.id); + modes[i] = drm_has_preferred_mode(connector, width, height); - if (!modes[i]) { + /* No preferred modes, pick one off the list */ + if (!modes[i] && !list_empty(&connector->modes)) { list_for_each_entry(modes[i], &connector->modes, head) break; } + DRM_DEBUG("found mode %s\n", modes[i] ? modes[i]->name : + "none"); i++; } return true; @@ -369,6 +401,8 @@ static void drm_setup_crtcs(struct drm_device *dev) int width, height; int i, ret; + DRM_DEBUG("\n"); + width = dev->mode_config.max_width; height = dev->mode_config.max_height; @@ -390,6 +424,8 @@ static void drm_setup_crtcs(struct drm_device *dev) if (!ret) DRM_ERROR("Unable to find initial modes\n"); + DRM_DEBUG("picking CRTCs for %dx%d config\n", width, height); + drm_pick_crtcs(dev, crtcs, modes, 0, width, height); i = 0; @@ -403,6 +439,8 @@ static void drm_setup_crtcs(struct drm_device *dev) } if (mode && crtc) { + DRM_DEBUG("desired mode %s set on crtc %d\n", + mode->name, crtc->base.id); crtc->desired_mode = mode; connector->encoder->crtc = crtc; } else @@ -764,10 +802,31 @@ bool drm_helper_plugged_event(struct drm_device *dev) */ bool drm_helper_initial_config(struct drm_device *dev, bool can_grow) { - int ret = false; + struct drm_connector *connector; + int count = 0; - drm_helper_plugged_event(dev); - return ret; + count = drm_helper_probe_connector_modes(dev, + dev->mode_config.max_width, + dev->mode_config.max_height); + + /* + * None of the available connectors had any modes, so add some + * and try to light them up anyway + */ + if (!count) { + DRM_ERROR("connectors have no modes, using standard modes\n"); + list_for_each_entry(connector, + &dev->mode_config.connector_list, + head) + drm_helper_add_std_modes(dev, connector); + } + + drm_setup_crtcs(dev); + + /* alert the driver fb layer */ + dev->mode_config.funcs->fb_changed(dev); + + return 0; } EXPORT_SYMBOL(drm_helper_initial_config); diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 0acb07f31fa..47809ac94bc 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -395,7 +395,7 @@ struct drm_connector_funcs { void (*save)(struct drm_connector *connector); void (*restore)(struct drm_connector *connector); enum drm_connector_status (*detect)(struct drm_connector *connector); - void (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height); + int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height); int (*set_property)(struct drm_connector *connector, struct drm_property *property, uint64_t val); void (*destroy)(struct drm_connector *connector); diff --git a/include/drm/drm_crtc_helper.h b/include/drm/drm_crtc_helper.h index 4bc04cf460a..0c6f0e11b41 100644 --- a/include/drm/drm_crtc_helper.h +++ b/include/drm/drm_crtc_helper.h @@ -88,7 +88,7 @@ struct drm_connector_helper_funcs { struct drm_encoder *(*best_encoder)(struct drm_connector *connector); }; -extern void drm_helper_probe_single_connector_modes(struct drm_connector *connector, uint32_t maxX, uint32_t maxY); +extern int drm_helper_probe_single_connector_modes(struct drm_connector *connector, uint32_t maxX, uint32_t maxY); extern void drm_helper_disable_unused_functions(struct drm_device *dev); extern int drm_helper_hotplug_stage_two(struct drm_device *dev); extern bool drm_helper_initial_config(struct drm_device *dev, bool can_grow); -- cgit From 712531bfe95be42a672ebab51b55580e7d92c464 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Fri, 9 Jan 2009 13:56:14 -0800 Subject: drm: handle depth & bpp changes correctly Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 48 +++++++++++++++++++++++++----------- drivers/gpu/drm/i915/intel_display.c | 2 ++ 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index e490e69db21..964c5eb1fad 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -480,6 +480,7 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, int saved_x, saved_y; struct drm_encoder *encoder; bool ret = true; + bool depth_changed, bpp_changed; adjusted_mode = drm_mode_duplicate(dev, mode); @@ -488,6 +489,15 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, if (!crtc->enabled) return true; + if (old_fb && crtc->fb) { + depth_changed = (old_fb->depth != crtc->fb->depth); + bpp_changed = (old_fb->bits_per_pixel != + crtc->fb->bits_per_pixel); + } else { + depth_changed = true; + bpp_changed = true; + } + saved_mode = crtc->mode; saved_x = crtc->x; saved_y = crtc->y; @@ -500,7 +510,8 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc, crtc->y = y; if (drm_mode_equal(&saved_mode, &crtc->mode)) { - if (saved_x != crtc->x || saved_y != crtc->y) { + if (saved_x != crtc->x || saved_y != crtc->y || + depth_changed || bpp_changed) { crtc_funcs->mode_set_base(crtc, crtc->x, crtc->y, old_fb); goto done; @@ -606,8 +617,8 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) struct drm_encoder **save_encoders, *new_encoder; struct drm_framebuffer *old_fb; bool save_enabled; - bool changed = false; - bool flip_or_move = false; + bool mode_changed = false; + bool fb_changed = false; struct drm_connector *connector; int count = 0, ro, fail = 0; struct drm_crtc_helper_funcs *crtc_funcs; @@ -635,7 +646,10 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) /* save previous config */ save_enabled = set->crtc->enabled; - /* this is meant to be num_connector not num_crtc */ + /* + * We do mode_config.num_connectors here since we'll look at the + * CRTC and encoder associated with each connector later. + */ save_crtcs = kzalloc(dev->mode_config.num_connector * sizeof(struct drm_crtc *), GFP_KERNEL); if (!save_crtcs) @@ -651,21 +665,25 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) /* We should be able to check here if the fb has the same properties * and then just flip_or_move it */ if (set->crtc->fb != set->fb) { - /* if we have no fb then its a change not a flip */ + /* If we have no fb then treat it as a full mode set */ if (set->crtc->fb == NULL) - changed = true; + mode_changed = true; + else if ((set->fb->bits_per_pixel != + set->crtc->fb->bits_per_pixel) || + set->fb->depth != set->crtc->fb->depth) + fb_changed = true; else - flip_or_move = true; + fb_changed = true; } if (set->x != set->crtc->x || set->y != set->crtc->y) - flip_or_move = true; + fb_changed = true; if (set->mode && !drm_mode_equal(set->mode, &set->crtc->mode)) { DRM_DEBUG("modes are different\n"); drm_mode_debug_printmodeline(&set->crtc->mode); drm_mode_debug_printmodeline(set->mode); - changed = true; + mode_changed = true; } /* a) traverse passed in connector list and get encoders for them */ @@ -688,7 +706,7 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) } if (new_encoder != connector->encoder) { - changed = true; + mode_changed = true; connector->encoder = new_encoder; } } @@ -715,16 +733,16 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) new_crtc = set->crtc; } if (new_crtc != connector->encoder->crtc) { - changed = true; + mode_changed = true; connector->encoder->crtc = new_crtc; } } /* mode_set_base is not a required function */ - if (flip_or_move && !crtc_funcs->mode_set_base) - changed = true; + if (fb_changed && !crtc_funcs->mode_set_base) + mode_changed = true; - if (changed) { + if (mode_changed) { old_fb = set->crtc->fb; set->crtc->fb = set->fb; set->crtc->enabled = (set->mode != NULL); @@ -743,7 +761,7 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set) set->crtc->desired_mode = set->mode; } drm_helper_disable_unused_functions(dev); - } else if (flip_or_move) { + } else if (fb_changed) { old_fb = set->crtc->fb; if (set->crtc->fb != set->fb) set->crtc->fb = set->fb; diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8ccb9c3ab86..4372acff5a0 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -401,6 +401,8 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, I915_WRITE(dspstride, crtc->fb->pitch); dspcntr = I915_READ(dspcntr_reg); + /* Mask out pixel format bits in case we change it */ + dspcntr &= ~DISPPLANE_PIXFORMAT_MASK; switch (crtc->fb->bits_per_pixel) { case 8: dspcntr |= DISPPLANE_8BPP; -- cgit From e285f3cd2c376d2336f9a383241a98266363c7d4 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Wed, 14 Jan 2009 10:53:36 -0800 Subject: drm/i915: make LVDS fixed mode a preferred mode The detected fixed panel mode really is preferred, so mark it as such and add it to the LVDS connector mode list. Signed-off-by: Jesse Barnes Signed-off-by: Eric Anholt Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_lvds.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index ccecfaf6307..2fafdcc108f 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -456,6 +456,13 @@ void intel_lvds_init(struct drm_device *dev) dev_priv->panel_fixed_mode = drm_mode_duplicate(dev, dev_priv->vbt_mode); mutex_unlock(&dev->mode_config.mutex); + if (dev_priv->panel_fixed_mode) { + dev_priv->panel_fixed_mode->type |= + DRM_MODE_TYPE_PREFERRED; + drm_mode_probed_add(connector, + dev_priv->panel_fixed_mode); + goto out; + } } /* -- cgit From 71acb5eb8d95b371f4cdd88a47f3c83c870d1c8f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Tue, 30 Dec 2008 20:31:46 +1000 Subject: drm/i915: add support for physical memory objects This is an initial patch to do support for objects which needs physical contiguous main ram, cursors and overlay registers on older chipsets. These objects are bound on cursor bin, like pinning, and we copy the data to/from the backing store object into the real one on attach/detach. notes: possible over the top in attach/detach operations. no overlay support yet. Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/i915_dma.c | 2 + drivers/gpu/drm/i915/i915_drv.h | 23 +++++ drivers/gpu/drm/i915/i915_gem.c | 189 ++++++++++++++++++++++++++++++++++- drivers/gpu/drm/i915/intel_display.c | 32 ++++-- 4 files changed, 233 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 868f574363a..bbadf1c0414 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1160,6 +1160,8 @@ int i915_driver_unload(struct drm_device *dev) if (drm_core_check_feature(dev, DRIVER_MODESET)) { intel_modeset_cleanup(dev); + i915_gem_free_all_phys_object(dev); + mutex_lock(&dev->struct_mutex); i915_gem_cleanup_ringbuffer(dev); mutex_unlock(&dev->struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 563de18063f..e1351825200 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -72,6 +72,18 @@ enum pipe { #define WATCH_INACTIVE 0 #define WATCH_PWRITE 0 +#define I915_GEM_PHYS_CURSOR_0 1 +#define I915_GEM_PHYS_CURSOR_1 2 +#define I915_GEM_PHYS_OVERLAY_REGS 3 +#define I915_MAX_PHYS_OBJECT (I915_GEM_PHYS_OVERLAY_REGS) + +struct drm_i915_gem_phys_object { + int id; + struct page **page_list; + drm_dma_handle_t *handle; + struct drm_gem_object *cur_obj; +}; + typedef struct _drm_i915_ring_buffer { int tail_mask; unsigned long Size; @@ -358,6 +370,9 @@ typedef struct drm_i915_private { uint32_t bit_6_swizzle_x; /** Bit 6 swizzling required for Y tiling */ uint32_t bit_6_swizzle_y; + + /* storage for physical objects */ + struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT]; } mm; } drm_i915_private_t; @@ -436,6 +451,9 @@ struct drm_i915_gem_object { /** User space pin count and filp owning the pin */ uint32_t user_pin_count; struct drm_file *pin_filp; + + /** for phy allocated objects */ + struct drm_i915_gem_phys_object *phys_obj; }; /** @@ -598,6 +616,11 @@ int i915_gem_do_init(struct drm_device *dev, unsigned long start, int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); int i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write); +int i915_gem_attach_phys_object(struct drm_device *dev, + struct drm_gem_object *obj, int id); +void i915_gem_detach_phys_object(struct drm_device *dev, + struct drm_gem_object *obj); +void i915_gem_free_all_phys_object(struct drm_device *dev); /* i915_gem_tiling.c */ void i915_gem_detect_bit_6_swizzle(struct drm_device *dev); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1384d668655..96316fd4723 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -55,6 +55,9 @@ static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, static void i915_gem_object_get_fence_reg(struct drm_gem_object *obj); static void i915_gem_clear_fence_reg(struct drm_gem_object *obj); static int i915_gem_evict_something(struct drm_device *dev); +static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, + struct drm_i915_gem_pwrite *args, + struct drm_file *file_priv); int i915_gem_do_init(struct drm_device *dev, unsigned long start, unsigned long end) @@ -386,8 +389,10 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * pread/pwrite currently are reading and writing from the CPU * perspective, requiring manual detiling by the client. */ - if (obj_priv->tiling_mode == I915_TILING_NONE && - dev->gtt_total != 0) + if (obj_priv->phys_obj) + ret = i915_gem_phys_pwrite(dev, obj, args, file_priv); + else if (obj_priv->tiling_mode == I915_TILING_NONE && + dev->gtt_total != 0) ret = i915_gem_gtt_pwrite(dev, obj, args, file_priv); else ret = i915_gem_shmem_pwrite(dev, obj, args, file_priv); @@ -2858,6 +2863,9 @@ void i915_gem_free_object(struct drm_gem_object *obj) while (obj_priv->pin_count > 0) i915_gem_object_unpin(obj); + if (obj_priv->phys_obj) + i915_gem_detach_phys_object(dev, obj); + i915_gem_object_unbind(obj); list = &obj->map_list; @@ -3293,3 +3301,180 @@ i915_gem_load(struct drm_device *dev) i915_gem_detect_bit_6_swizzle(dev); } + +/* + * Create a physically contiguous memory object for this object + * e.g. for cursor + overlay regs + */ +int i915_gem_init_phys_object(struct drm_device *dev, + int id, int size) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_phys_object *phys_obj; + int ret; + + if (dev_priv->mm.phys_objs[id - 1] || !size) + return 0; + + phys_obj = drm_calloc(1, sizeof(struct drm_i915_gem_phys_object), DRM_MEM_DRIVER); + if (!phys_obj) + return -ENOMEM; + + phys_obj->id = id; + + phys_obj->handle = drm_pci_alloc(dev, size, 0, 0xffffffff); + if (!phys_obj->handle) { + ret = -ENOMEM; + goto kfree_obj; + } +#ifdef CONFIG_X86 + set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); +#endif + + dev_priv->mm.phys_objs[id - 1] = phys_obj; + + return 0; +kfree_obj: + drm_free(phys_obj, sizeof(struct drm_i915_gem_phys_object), DRM_MEM_DRIVER); + return ret; +} + +void i915_gem_free_phys_object(struct drm_device *dev, int id) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_phys_object *phys_obj; + + if (!dev_priv->mm.phys_objs[id - 1]) + return; + + phys_obj = dev_priv->mm.phys_objs[id - 1]; + if (phys_obj->cur_obj) { + i915_gem_detach_phys_object(dev, phys_obj->cur_obj); + } + +#ifdef CONFIG_X86 + set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE); +#endif + drm_pci_free(dev, phys_obj->handle); + kfree(phys_obj); + dev_priv->mm.phys_objs[id - 1] = NULL; +} + +void i915_gem_free_all_phys_object(struct drm_device *dev) +{ + int i; + + for (i = 0; i < I915_MAX_PHYS_OBJECT; i++) + i915_gem_free_phys_object(dev, i); +} + +void i915_gem_detach_phys_object(struct drm_device *dev, + struct drm_gem_object *obj) +{ + struct drm_i915_gem_object *obj_priv; + int i; + int ret; + int page_count; + + obj_priv = obj->driver_private; + if (!obj_priv->phys_obj) + return; + + ret = i915_gem_object_get_page_list(obj); + if (ret) + goto out; + + page_count = obj->size / PAGE_SIZE; + + for (i = 0; i < page_count; i++) { + char *dst = kmap_atomic(obj_priv->page_list[i], KM_USER0); + char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); + + memcpy(dst, src, PAGE_SIZE); + kunmap_atomic(dst, KM_USER0); + } + drm_clflush_pages(obj_priv->page_list, page_count); + drm_agp_chipset_flush(dev); +out: + obj_priv->phys_obj->cur_obj = NULL; + obj_priv->phys_obj = NULL; +} + +int +i915_gem_attach_phys_object(struct drm_device *dev, + struct drm_gem_object *obj, int id) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv; + int ret = 0; + int page_count; + int i; + + if (id > I915_MAX_PHYS_OBJECT) + return -EINVAL; + + obj_priv = obj->driver_private; + + if (obj_priv->phys_obj) { + if (obj_priv->phys_obj->id == id) + return 0; + i915_gem_detach_phys_object(dev, obj); + } + + + /* create a new object */ + if (!dev_priv->mm.phys_objs[id - 1]) { + ret = i915_gem_init_phys_object(dev, id, + obj->size); + if (ret) { + DRM_ERROR("failed to init phys object %d size: %d\n", id, obj->size); + goto out; + } + } + + /* bind to the object */ + obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1]; + obj_priv->phys_obj->cur_obj = obj; + + ret = i915_gem_object_get_page_list(obj); + if (ret) { + DRM_ERROR("failed to get page list\n"); + goto out; + } + + page_count = obj->size / PAGE_SIZE; + + for (i = 0; i < page_count; i++) { + char *src = kmap_atomic(obj_priv->page_list[i], KM_USER0); + char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE); + + memcpy(dst, src, PAGE_SIZE); + kunmap_atomic(src, KM_USER0); + } + + return 0; +out: + return ret; +} + +static int +i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj, + struct drm_i915_gem_pwrite *args, + struct drm_file *file_priv) +{ + struct drm_i915_gem_object *obj_priv = obj->driver_private; + void *obj_addr; + int ret; + char __user *user_data; + + user_data = (char __user *) (uintptr_t) args->data_ptr; + obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset; + + DRM_ERROR("obj_addr %p, %lld\n", obj_addr, args->size); + ret = copy_from_user(obj_addr, user_data, args->size); + if (ret) + return -EFAULT; + + drm_agp_chipset_flush(dev); + return 0; +} diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 4372acff5a0..114a7a1a874 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1020,17 +1020,23 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, return -ENOMEM; } - if (dev_priv->cursor_needs_physical) { - addr = dev->agp->base + obj_priv->gtt_offset; - } else { + /* we only need to pin inside GTT if cursor is non-phy */ + if (!dev_priv->cursor_needs_physical) { + ret = i915_gem_object_pin(bo, PAGE_SIZE); + if (ret) { + DRM_ERROR("failed to pin cursor bo\n"); + drm_gem_object_unreference(bo); + return ret; + } addr = obj_priv->gtt_offset; - } - - ret = i915_gem_object_pin(bo, PAGE_SIZE); - if (ret) { - DRM_ERROR("failed to pin cursor bo\n"); - drm_gem_object_unreference(bo); - return ret; + } else { + ret = i915_gem_attach_phys_object(dev, bo, (pipe == 0) ? I915_GEM_PHYS_CURSOR_0 : I915_GEM_PHYS_CURSOR_1); + if (ret) { + DRM_ERROR("failed to attach phys object\n"); + drm_gem_object_unreference(bo); + return ret; + } + addr = obj_priv->phys_obj->handle->busaddr; } temp = 0; @@ -1043,7 +1049,11 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, I915_WRITE(base, addr); if (intel_crtc->cursor_bo) { - i915_gem_object_unpin(intel_crtc->cursor_bo); + if (dev_priv->cursor_needs_physical) { + if (intel_crtc->cursor_bo != bo) + i915_gem_detach_phys_object(dev, intel_crtc->cursor_bo); + } else + i915_gem_object_unpin(intel_crtc->cursor_bo); drm_gem_object_unreference(intel_crtc->cursor_bo); } -- cgit From 34b8686e12eaf9878aaab89e92222060c3e7cc48 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 15 Jan 2009 14:03:07 +1000 Subject: drm/i915: lock correct mutex around object unreference. This makes sure the mutex is held around the unreference. Signed-off-by: Dave Airlie --- drivers/gpu/drm/i915/intel_display.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 114a7a1a874..31c3732b7a6 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1016,8 +1016,8 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, if (bo->size < width * height * 4) { DRM_ERROR("buffer is to small\n"); - drm_gem_object_unreference(bo); - return -ENOMEM; + ret = -ENOMEM; + goto fail; } /* we only need to pin inside GTT if cursor is non-phy */ @@ -1025,16 +1025,14 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, ret = i915_gem_object_pin(bo, PAGE_SIZE); if (ret) { DRM_ERROR("failed to pin cursor bo\n"); - drm_gem_object_unreference(bo); - return ret; + goto fail; } addr = obj_priv->gtt_offset; } else { ret = i915_gem_attach_phys_object(dev, bo, (pipe == 0) ? I915_GEM_PHYS_CURSOR_0 : I915_GEM_PHYS_CURSOR_1); if (ret) { DRM_ERROR("failed to attach phys object\n"); - drm_gem_object_unreference(bo); - return ret; + goto fail; } addr = obj_priv->phys_obj->handle->busaddr; } @@ -1054,13 +1052,20 @@ static int intel_crtc_cursor_set(struct drm_crtc *crtc, i915_gem_detach_phys_object(dev, intel_crtc->cursor_bo); } else i915_gem_object_unpin(intel_crtc->cursor_bo); + mutex_lock(&dev->struct_mutex); drm_gem_object_unreference(intel_crtc->cursor_bo); + mutex_unlock(&dev->struct_mutex); } intel_crtc->cursor_addr = addr; intel_crtc->cursor_bo = bo; return 0; +fail: + mutex_lock(&dev->struct_mutex); + drm_gem_object_unreference(bo); + mutex_unlock(&dev->struct_mutex); + return ret; } static int intel_crtc_cursor_move(struct drm_crtc *crtc, int x, int y) -- cgit From 94be9a58d7e683ac3c1df1858a17f09ebade8da0 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Fri, 16 Jan 2009 10:17:09 -0500 Subject: [libata] get-identity ioctl: Fix use of invalid memory pointer for SAS drivers. Caught by Ke Wei (and team?) at Marvell. Also, move the ata_scsi_ioctl export to libata-scsi.c, as that seems to be the general trend. Acked-by: James Bottomley Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 1 - drivers/ata/libata-scsi.c | 17 +++++++++++++---- drivers/scsi/ipr.c | 2 +- drivers/scsi/libsas/sas_scsi_host.c | 2 +- include/linux/libata.h | 2 ++ 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 71218d76d75..552ecae1343 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -6638,7 +6638,6 @@ EXPORT_SYMBOL_GPL(ata_dev_pair); EXPORT_SYMBOL_GPL(ata_port_disable); EXPORT_SYMBOL_GPL(ata_ratelimit); EXPORT_SYMBOL_GPL(ata_wait_register); -EXPORT_SYMBOL_GPL(ata_scsi_ioctl); EXPORT_SYMBOL_GPL(ata_scsi_queuecmd); EXPORT_SYMBOL_GPL(ata_scsi_slave_config); EXPORT_SYMBOL_GPL(ata_scsi_slave_destroy); diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index 9e92107691f..a1a6e6298c3 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -423,9 +423,9 @@ int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, * RETURNS: * Zero on success, negative errno on error. */ -static int ata_get_identity(struct scsi_device *sdev, void __user *arg) +static int ata_get_identity(struct ata_port *ap, struct scsi_device *sdev, + void __user *arg) { - struct ata_port *ap = ata_shost_to_port(sdev->host); struct ata_device *dev = ata_scsi_find_dev(ap, sdev); u16 __user *dst = arg; char buf[40]; @@ -645,7 +645,8 @@ int ata_task_ioctl(struct scsi_device *scsidev, void __user *arg) return rc; } -int ata_scsi_ioctl(struct scsi_device *scsidev, int cmd, void __user *arg) +int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *scsidev, + int cmd, void __user *arg) { int val = -EINVAL, rc = -EINVAL; @@ -663,7 +664,7 @@ int ata_scsi_ioctl(struct scsi_device *scsidev, int cmd, void __user *arg) return 0; case HDIO_GET_IDENTITY: - return ata_get_identity(scsidev, arg); + return ata_get_identity(ap, scsidev, arg); case HDIO_DRIVE_CMD: if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) @@ -682,6 +683,14 @@ int ata_scsi_ioctl(struct scsi_device *scsidev, int cmd, void __user *arg) return rc; } +EXPORT_SYMBOL_GPL(ata_sas_scsi_ioctl); + +int ata_scsi_ioctl(struct scsi_device *scsidev, int cmd, void __user *arg) +{ + return ata_sas_scsi_ioctl(ata_shost_to_port(scsidev->host), + scsidev, cmd, arg); +} +EXPORT_SYMBOL_GPL(ata_scsi_ioctl); /** * ata_scsi_qc_new - acquire new ata_queued_cmd reference diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index 841f460edbc..07829009a8b 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -4912,7 +4912,7 @@ static int ipr_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) if (res && ipr_is_gata(res)) { if (cmd == HDIO_GET_IDENTITY) return -ENOTTY; - return ata_scsi_ioctl(sdev, cmd, arg); + return ata_sas_scsi_ioctl(res->sata_port->ap, sdev, cmd, arg); } return -EINVAL; diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index 744838780ad..1c558d3bce1 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -717,7 +717,7 @@ int sas_ioctl(struct scsi_device *sdev, int cmd, void __user *arg) struct domain_device *dev = sdev_to_domain_dev(sdev); if (dev_is_sata(dev)) - return ata_scsi_ioctl(sdev, cmd, arg); + return ata_sas_scsi_ioctl(dev->sata_dev.ap, sdev, cmd, arg); return -EINVAL; } diff --git a/include/linux/libata.h b/include/linux/libata.h index b6b8a7f3ec6..73b69c7071c 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -927,6 +927,8 @@ extern void ata_host_init(struct ata_host *, struct device *, extern int ata_scsi_detect(struct scsi_host_template *sht); extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg); extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)); +extern int ata_sas_scsi_ioctl(struct ata_port *ap, struct scsi_device *dev, + int cmd, void __user *arg); extern void ata_sas_port_destroy(struct ata_port *); extern struct ata_port *ata_sas_port_alloc(struct ata_host *, struct ata_port_info *, struct Scsi_Host *); -- cgit From 16028232bf6dc5ed7e4bd341ef335f386229ad0b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 16 Jan 2009 15:27:27 +0900 Subject: pata_atiixp: update port enabledness test handling Port enabledness test fits much better into init_one() instead of pre_reset(). The reason why these tests are in pre_reset() is purely historical at this point. Move it to init_one(). This will help further changes. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/ata/pata_atiixp.c | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c index 0e2cde8f997..506adde8ebb 100644 --- a/drivers/ata/pata_atiixp.c +++ b/drivers/ata/pata_atiixp.c @@ -32,21 +32,6 @@ enum { ATIIXP_IDE_UDMA_MODE = 0x56 }; -static int atiixp_pre_reset(struct ata_link *link, unsigned long deadline) -{ - struct ata_port *ap = link->ap; - static const struct pci_bits atiixp_enable_bits[] = { - { 0x48, 1, 0x01, 0x00 }, - { 0x48, 1, 0x08, 0x00 } - }; - struct pci_dev *pdev = to_pci_dev(ap->host->dev); - - if (!pci_test_config_bits(pdev, &atiixp_enable_bits[ap->port_no])) - return -ENOENT; - - return ata_sff_prereset(link, deadline); -} - static int atiixp_cable_detect(struct ata_port *ap) { struct pci_dev *pdev = to_pci_dev(ap->host->dev); @@ -229,10 +214,9 @@ static struct ata_port_operations atiixp_port_ops = { .cable_detect = atiixp_cable_detect, .set_piomode = atiixp_set_piomode, .set_dmamode = atiixp_set_dmamode, - .prereset = atiixp_pre_reset, }; -static int atiixp_init_one(struct pci_dev *dev, const struct pci_device_id *id) +static int atiixp_init_one(struct pci_dev *pdev, const struct pci_device_id *id) { static const struct ata_port_info info = { .flags = ATA_FLAG_SLAVE_POSS, @@ -241,8 +225,18 @@ static int atiixp_init_one(struct pci_dev *dev, const struct pci_device_id *id) .udma_mask = 0x3F, .port_ops = &atiixp_port_ops }; - const struct ata_port_info *ppi[] = { &info, NULL }; - return ata_pci_sff_init_one(dev, ppi, &atiixp_sht, NULL); + static const struct pci_bits atiixp_enable_bits[] = { + { 0x48, 1, 0x01, 0x00 }, + { 0x48, 1, 0x08, 0x00 } + }; + const struct ata_port_info *ppi[] = { &info, &info }; + int i; + + for (i = 0; i < 2; i++) + if (!pci_test_config_bits(pdev, &atiixp_enable_bits[i])) + ppi[i] = &ata_dummy_port_info; + + return ata_pci_sff_init_one(pdev, ppi, &atiixp_sht, NULL); } static const struct pci_device_id atiixp[] = { -- cgit From 6813952021a7820a505002de260bda36978671f7 Mon Sep 17 00:00:00 2001 From: "JosephChan@via.com.tw" Date: Fri, 16 Jan 2009 19:44:55 +0800 Subject: sata_via: Add VT8261 support Signed-off-by: Joseph Chan Signed-off-by: Jeff Garzik --- drivers/ata/sata_via.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ata/sata_via.c b/drivers/ata/sata_via.c index c18935f0bda..5c62da9cd49 100644 --- a/drivers/ata/sata_via.c +++ b/drivers/ata/sata_via.c @@ -92,6 +92,8 @@ static const struct pci_device_id svia_pci_tbl[] = { { PCI_VDEVICE(VIA, 0x5372), vt6420 }, { PCI_VDEVICE(VIA, 0x7372), vt6420 }, { PCI_VDEVICE(VIA, 0x5287), vt8251 }, /* 2 sata chnls (Master/Slave) */ + { PCI_VDEVICE(VIA, 0x9000), vt8251 }, + { PCI_VDEVICE(VIA, 0x9040), vt8251 }, { } /* terminate list */ }; -- cgit From 3ada9c126499dd4700dcdbd5b9fe8110ad17f578 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 15 Jan 2009 17:45:31 -0800 Subject: libata: Add another column to the ata_timing table. The forthcoming OCTEON SOC Compact Flash driver needs an additional timing value that was not available in the ata_timing table. I add a new column for dmack_hold time. The values were obtained from the Compact Flash specification Rev 4.1. Signed-off-by: David Daney Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 72 ++++++++++++++++++++++++----------------------- include/linux/libata.h | 9 ++++-- 2 files changed, 43 insertions(+), 38 deletions(-) diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index 552ecae1343..88c242856da 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -3029,33 +3029,33 @@ int sata_set_spd(struct ata_link *link) */ static const struct ata_timing ata_timing[] = { -/* { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 960, 0 }, */ - { XFER_PIO_0, 70, 290, 240, 600, 165, 150, 600, 0 }, - { XFER_PIO_1, 50, 290, 93, 383, 125, 100, 383, 0 }, - { XFER_PIO_2, 30, 290, 40, 330, 100, 90, 240, 0 }, - { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 180, 0 }, - { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 120, 0 }, - { XFER_PIO_5, 15, 65, 25, 100, 65, 25, 100, 0 }, - { XFER_PIO_6, 10, 55, 20, 80, 55, 20, 80, 0 }, - - { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 960, 0 }, - { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 480, 0 }, - { XFER_SW_DMA_2, 60, 0, 0, 0, 120, 120, 240, 0 }, - - { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 480, 0 }, - { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 150, 0 }, - { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 120, 0 }, - { XFER_MW_DMA_3, 25, 0, 0, 0, 65, 25, 100, 0 }, - { XFER_MW_DMA_4, 25, 0, 0, 0, 55, 20, 80, 0 }, - -/* { XFER_UDMA_SLOW, 0, 0, 0, 0, 0, 0, 0, 150 }, */ - { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 120 }, - { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 80 }, - { XFER_UDMA_2, 0, 0, 0, 0, 0, 0, 0, 60 }, - { XFER_UDMA_3, 0, 0, 0, 0, 0, 0, 0, 45 }, - { XFER_UDMA_4, 0, 0, 0, 0, 0, 0, 0, 30 }, - { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 20 }, - { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 15 }, +/* { XFER_PIO_SLOW, 120, 290, 240, 960, 290, 240, 0, 960, 0 }, */ + { XFER_PIO_0, 70, 290, 240, 600, 165, 150, 0, 600, 0 }, + { XFER_PIO_1, 50, 290, 93, 383, 125, 100, 0, 383, 0 }, + { XFER_PIO_2, 30, 290, 40, 330, 100, 90, 0, 240, 0 }, + { XFER_PIO_3, 30, 80, 70, 180, 80, 70, 0, 180, 0 }, + { XFER_PIO_4, 25, 70, 25, 120, 70, 25, 0, 120, 0 }, + { XFER_PIO_5, 15, 65, 25, 100, 65, 25, 0, 100, 0 }, + { XFER_PIO_6, 10, 55, 20, 80, 55, 20, 0, 80, 0 }, + + { XFER_SW_DMA_0, 120, 0, 0, 0, 480, 480, 50, 960, 0 }, + { XFER_SW_DMA_1, 90, 0, 0, 0, 240, 240, 30, 480, 0 }, + { XFER_SW_DMA_2, 60, 0, 0, 0, 120, 120, 20, 240, 0 }, + + { XFER_MW_DMA_0, 60, 0, 0, 0, 215, 215, 20, 480, 0 }, + { XFER_MW_DMA_1, 45, 0, 0, 0, 80, 50, 5, 150, 0 }, + { XFER_MW_DMA_2, 25, 0, 0, 0, 70, 25, 5, 120, 0 }, + { XFER_MW_DMA_3, 25, 0, 0, 0, 65, 25, 5, 100, 0 }, + { XFER_MW_DMA_4, 25, 0, 0, 0, 55, 20, 5, 80, 0 }, + +/* { XFER_UDMA_SLOW, 0, 0, 0, 0, 0, 0, 0, 0, 150 }, */ + { XFER_UDMA_0, 0, 0, 0, 0, 0, 0, 0, 0, 120 }, + { XFER_UDMA_1, 0, 0, 0, 0, 0, 0, 0, 0, 80 }, + { XFER_UDMA_2, 0, 0, 0, 0, 0, 0, 0, 0, 60 }, + { XFER_UDMA_3, 0, 0, 0, 0, 0, 0, 0, 0, 45 }, + { XFER_UDMA_4, 0, 0, 0, 0, 0, 0, 0, 0, 30 }, + { XFER_UDMA_5, 0, 0, 0, 0, 0, 0, 0, 0, 20 }, + { XFER_UDMA_6, 0, 0, 0, 0, 0, 0, 0, 0, 15 }, { 0xFF } }; @@ -3065,14 +3065,15 @@ static const struct ata_timing ata_timing[] = { static void ata_timing_quantize(const struct ata_timing *t, struct ata_timing *q, int T, int UT) { - q->setup = EZ(t->setup * 1000, T); - q->act8b = EZ(t->act8b * 1000, T); - q->rec8b = EZ(t->rec8b * 1000, T); - q->cyc8b = EZ(t->cyc8b * 1000, T); - q->active = EZ(t->active * 1000, T); - q->recover = EZ(t->recover * 1000, T); - q->cycle = EZ(t->cycle * 1000, T); - q->udma = EZ(t->udma * 1000, UT); + q->setup = EZ(t->setup * 1000, T); + q->act8b = EZ(t->act8b * 1000, T); + q->rec8b = EZ(t->rec8b * 1000, T); + q->cyc8b = EZ(t->cyc8b * 1000, T); + q->active = EZ(t->active * 1000, T); + q->recover = EZ(t->recover * 1000, T); + q->dmack_hold = EZ(t->dmack_hold * 1000, T); + q->cycle = EZ(t->cycle * 1000, T); + q->udma = EZ(t->udma * 1000, UT); } void ata_timing_merge(const struct ata_timing *a, const struct ata_timing *b, @@ -3084,6 +3085,7 @@ void ata_timing_merge(const struct ata_timing *a, const struct ata_timing *b, if (what & ATA_TIMING_CYC8B ) m->cyc8b = max(a->cyc8b, b->cyc8b); if (what & ATA_TIMING_ACTIVE ) m->active = max(a->active, b->active); if (what & ATA_TIMING_RECOVER) m->recover = max(a->recover, b->recover); + if (what & ATA_TIMING_DMACK_HOLD) m->dmack_hold = max(a->dmack_hold, b->dmack_hold); if (what & ATA_TIMING_CYCLE ) m->cycle = max(a->cycle, b->cycle); if (what & ATA_TIMING_UDMA ) m->udma = max(a->udma, b->udma); } diff --git a/include/linux/libata.h b/include/linux/libata.h index 73b69c7071c..2c6bd66209f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -401,12 +401,14 @@ enum { ATA_TIMING_CYC8B, ATA_TIMING_ACTIVE = (1 << 4), ATA_TIMING_RECOVER = (1 << 5), - ATA_TIMING_CYCLE = (1 << 6), - ATA_TIMING_UDMA = (1 << 7), + ATA_TIMING_DMACK_HOLD = (1 << 6), + ATA_TIMING_CYCLE = (1 << 7), + ATA_TIMING_UDMA = (1 << 8), ATA_TIMING_ALL = ATA_TIMING_SETUP | ATA_TIMING_ACT8B | ATA_TIMING_REC8B | ATA_TIMING_CYC8B | ATA_TIMING_ACTIVE | ATA_TIMING_RECOVER | - ATA_TIMING_CYCLE | ATA_TIMING_UDMA, + ATA_TIMING_DMACK_HOLD | ATA_TIMING_CYCLE | + ATA_TIMING_UDMA, }; enum ata_xfer_mask { @@ -866,6 +868,7 @@ struct ata_timing { unsigned short cyc8b; /* t0 for 8-bit I/O */ unsigned short active; /* t2 or tD */ unsigned short recover; /* t2i or tK */ + unsigned short dmack_hold; /* tj */ unsigned short cycle; /* t0 */ unsigned short udma; /* t2CYCTYP/2 */ }; -- cgit From 3c929c6f5aa7501790586a38dd8faca8fed9a158 Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 15 Jan 2009 17:45:32 -0800 Subject: libata: New driver for OCTEON SOC Compact Flash interface (v7). Cavium OCTEON processor support was recently merged, so now we have this CF driver for your consideration. Most OCTEON variants have *no* DMA or interrupt support on the CF interface so for these, only PIO is supported. Although if DMA is available, we do take advantage of it. Signed-off-by: David Daney Signed-off-by: Jeff Garzik --- drivers/ata/Kconfig | 9 + drivers/ata/Makefile | 1 + drivers/ata/pata_octeon_cf.c | 965 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 975 insertions(+) create mode 100644 drivers/ata/pata_octeon_cf.c diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 1a7be96d627..503a908afc8 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -698,6 +698,15 @@ config PATA_IXP4XX_CF If unsure, say N. +config PATA_OCTEON_CF + tristate "OCTEON Boot Bus Compact Flash support" + depends on CPU_CAVIUM_OCTEON + help + This option enables a polled compact flash driver for use with + compact flash cards attached to the OCTEON boot bus. + + If unsure, say N. + config PATA_SCC tristate "Toshiba's Cell Reference Set IDE support" depends on PCI && PPC_CELLEB diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile index 674965fa326..7f1ecf99528 100644 --- a/drivers/ata/Makefile +++ b/drivers/ata/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_PATA_IXP4XX_CF) += pata_ixp4xx_cf.o obj-$(CONFIG_PATA_SCC) += pata_scc.o obj-$(CONFIG_PATA_SCH) += pata_sch.o obj-$(CONFIG_PATA_BF54X) += pata_bf54x.o +obj-$(CONFIG_PATA_OCTEON_CF) += pata_octeon_cf.o obj-$(CONFIG_PATA_PLATFORM) += pata_platform.o obj-$(CONFIG_PATA_OF_PLATFORM) += pata_of_platform.o obj-$(CONFIG_PATA_ICSIDE) += pata_icside.o diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c new file mode 100644 index 00000000000..0fe4ef309c6 --- /dev/null +++ b/drivers/ata/pata_octeon_cf.c @@ -0,0 +1,965 @@ +/* + * Driver for the Octeon bootbus compact flash. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2005 - 2009 Cavium Networks + * Copyright (C) 2008 Wind River Systems + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * The Octeon bootbus compact flash interface is connected in at least + * 3 different configurations on various evaluation boards: + * + * -- 8 bits no irq, no DMA + * -- 16 bits no irq, no DMA + * -- 16 bits True IDE mode with DMA, but no irq. + * + * In the last case the DMA engine can generate an interrupt when the + * transfer is complete. For the first two cases only PIO is supported. + * + */ + +#define DRV_NAME "pata_octeon_cf" +#define DRV_VERSION "2.1" + + +struct octeon_cf_port { + struct workqueue_struct *wq; + struct delayed_work delayed_finish; + struct ata_port *ap; + int dma_finished; +}; + +static struct scsi_host_template octeon_cf_sht = { + ATA_PIO_SHT(DRV_NAME), +}; + +/** + * Convert nanosecond based time to setting used in the + * boot bus timing register, based on timing multiple + */ +static unsigned int ns_to_tim_reg(unsigned int tim_mult, unsigned int nsecs) +{ + unsigned int val; + + /* + * Compute # of eclock periods to get desired duration in + * nanoseconds. + */ + val = DIV_ROUND_UP(nsecs * (octeon_get_clock_rate() / 1000000), + 1000 * tim_mult); + + return val; +} + +static void octeon_cf_set_boot_reg_cfg(int cs) +{ + union cvmx_mio_boot_reg_cfgx reg_cfg; + reg_cfg.u64 = cvmx_read_csr(CVMX_MIO_BOOT_REG_CFGX(cs)); + reg_cfg.s.dmack = 0; /* Don't assert DMACK on access */ + reg_cfg.s.tim_mult = 2; /* Timing mutiplier 2x */ + reg_cfg.s.rd_dly = 0; /* Sample on falling edge of BOOT_OE */ + reg_cfg.s.sam = 0; /* Don't combine write and output enable */ + reg_cfg.s.we_ext = 0; /* No write enable extension */ + reg_cfg.s.oe_ext = 0; /* No read enable extension */ + reg_cfg.s.en = 1; /* Enable this region */ + reg_cfg.s.orbit = 0; /* Don't combine with previous region */ + reg_cfg.s.ale = 0; /* Don't do address multiplexing */ + cvmx_write_csr(CVMX_MIO_BOOT_REG_CFGX(cs), reg_cfg.u64); +} + +/** + * Called after libata determines the needed PIO mode. This + * function programs the Octeon bootbus regions to support the + * timing requirements of the PIO mode. + * + * @ap: ATA port information + * @dev: ATA device + */ +static void octeon_cf_set_piomode(struct ata_port *ap, struct ata_device *dev) +{ + struct octeon_cf_data *ocd = ap->dev->platform_data; + union cvmx_mio_boot_reg_timx reg_tim; + int cs = ocd->base_region; + int T; + struct ata_timing timing; + + int use_iordy; + int trh; + int pause; + /* These names are timing parameters from the ATA spec */ + int t1; + int t2; + int t2i; + + T = (int)(2000000000000LL / octeon_get_clock_rate()); + + if (ata_timing_compute(dev, dev->pio_mode, &timing, T, T)) + BUG(); + + t1 = timing.setup; + if (t1) + t1--; + t2 = timing.active; + if (t2) + t2--; + t2i = timing.act8b; + if (t2i) + t2i--; + + trh = ns_to_tim_reg(2, 20); + if (trh) + trh--; + + pause = timing.cycle - timing.active - timing.setup - trh; + if (pause) + pause--; + + octeon_cf_set_boot_reg_cfg(cs); + if (ocd->dma_engine >= 0) + /* True IDE mode, program both chip selects. */ + octeon_cf_set_boot_reg_cfg(cs + 1); + + + use_iordy = ata_pio_need_iordy(dev); + + reg_tim.u64 = cvmx_read_csr(CVMX_MIO_BOOT_REG_TIMX(cs)); + /* Disable page mode */ + reg_tim.s.pagem = 0; + /* Enable dynamic timing */ + reg_tim.s.waitm = use_iordy; + /* Pages are disabled */ + reg_tim.s.pages = 0; + /* We don't use multiplexed address mode */ + reg_tim.s.ale = 0; + /* Not used */ + reg_tim.s.page = 0; + /* Time after IORDY to coninue to assert the data */ + reg_tim.s.wait = 0; + /* Time to wait to complete the cycle. */ + reg_tim.s.pause = pause; + /* How long to hold after a write to de-assert CE. */ + reg_tim.s.wr_hld = trh; + /* How long to wait after a read to de-assert CE. */ + reg_tim.s.rd_hld = trh; + /* How long write enable is asserted */ + reg_tim.s.we = t2; + /* How long read enable is asserted */ + reg_tim.s.oe = t2; + /* Time after CE that read/write starts */ + reg_tim.s.ce = ns_to_tim_reg(2, 5); + /* Time before CE that address is valid */ + reg_tim.s.adr = 0; + + /* Program the bootbus region timing for the data port chip select. */ + cvmx_write_csr(CVMX_MIO_BOOT_REG_TIMX(cs), reg_tim.u64); + if (ocd->dma_engine >= 0) + /* True IDE mode, program both chip selects. */ + cvmx_write_csr(CVMX_MIO_BOOT_REG_TIMX(cs + 1), reg_tim.u64); +} + +static void octeon_cf_set_dmamode(struct ata_port *ap, struct ata_device *dev) +{ + struct octeon_cf_data *ocd = dev->link->ap->dev->platform_data; + union cvmx_mio_boot_dma_timx dma_tim; + unsigned int oe_a; + unsigned int oe_n; + unsigned int dma_ackh; + unsigned int dma_arq; + unsigned int pause; + unsigned int T0, Tkr, Td; + unsigned int tim_mult; + + const struct ata_timing *timing; + + timing = ata_timing_find_mode(dev->dma_mode); + T0 = timing->cycle; + Td = timing->active; + Tkr = timing->recover; + dma_ackh = timing->dmack_hold; + + dma_tim.u64 = 0; + /* dma_tim.s.tim_mult = 0 --> 4x */ + tim_mult = 4; + + /* not spec'ed, value in eclocks, not affected by tim_mult */ + dma_arq = 8; + pause = 25 - dma_arq * 1000 / + (octeon_get_clock_rate() / 1000000); /* Tz */ + + oe_a = Td; + /* Tkr from cf spec, lengthened to meet T0 */ + oe_n = max(T0 - oe_a, Tkr); + + dma_tim.s.dmack_pi = 1; + + dma_tim.s.oe_n = ns_to_tim_reg(tim_mult, oe_n); + dma_tim.s.oe_a = ns_to_tim_reg(tim_mult, oe_a); + + /* + * This is tI, C.F. spec. says 0, but Sony CF card requires + * more, we use 20 nS. + */ + dma_tim.s.dmack_s = ns_to_tim_reg(tim_mult, 20);; + dma_tim.s.dmack_h = ns_to_tim_reg(tim_mult, dma_ackh); + + dma_tim.s.dmarq = dma_arq; + dma_tim.s.pause = ns_to_tim_reg(tim_mult, pause); + + dma_tim.s.rd_dly = 0; /* Sample right on edge */ + + /* writes only */ + dma_tim.s.we_n = ns_to_tim_reg(tim_mult, oe_n); + dma_tim.s.we_a = ns_to_tim_reg(tim_mult, oe_a); + + pr_debug("ns to ticks (mult %d) of %d is: %d\n", tim_mult, 60, + ns_to_tim_reg(tim_mult, 60)); + pr_debug("oe_n: %d, oe_a: %d, dmack_s: %d, dmack_h: " + "%d, dmarq: %d, pause: %d\n", + dma_tim.s.oe_n, dma_tim.s.oe_a, dma_tim.s.dmack_s, + dma_tim.s.dmack_h, dma_tim.s.dmarq, dma_tim.s.pause); + + cvmx_write_csr(CVMX_MIO_BOOT_DMA_TIMX(ocd->dma_engine), + dma_tim.u64); + +} + +/** + * Handle an 8 bit I/O request. + * + * @dev: Device to access + * @buffer: Data buffer + * @buflen: Length of the buffer. + * @rw: True to write. + */ +static unsigned int octeon_cf_data_xfer8(struct ata_device *dev, + unsigned char *buffer, + unsigned int buflen, + int rw) +{ + struct ata_port *ap = dev->link->ap; + void __iomem *data_addr = ap->ioaddr.data_addr; + unsigned long words; + int count; + + words = buflen; + if (rw) { + count = 16; + while (words--) { + iowrite8(*buffer, data_addr); + buffer++; + /* + * Every 16 writes do a read so the bootbus + * FIFO doesn't fill up. + */ + if (--count == 0) { + ioread8(ap->ioaddr.altstatus_addr); + count = 16; + } + } + } else { + ioread8_rep(data_addr, buffer, words); + } + return buflen; +} + +/** + * Handle a 16 bit I/O request. + * + * @dev: Device to access + * @buffer: Data buffer + * @buflen: Length of the buffer. + * @rw: True to write. + */ +static unsigned int octeon_cf_data_xfer16(struct ata_device *dev, + unsigned char *buffer, + unsigned int buflen, + int rw) +{ + struct ata_port *ap = dev->link->ap; + void __iomem *data_addr = ap->ioaddr.data_addr; + unsigned long words; + int count; + + words = buflen / 2; + if (rw) { + count = 16; + while (words--) { + iowrite16(*(uint16_t *)buffer, data_addr); + buffer += sizeof(uint16_t); + /* + * Every 16 writes do a read so the bootbus + * FIFO doesn't fill up. + */ + if (--count == 0) { + ioread8(ap->ioaddr.altstatus_addr); + count = 16; + } + } + } else { + while (words--) { + *(uint16_t *)buffer = ioread16(data_addr); + buffer += sizeof(uint16_t); + } + } + /* Transfer trailing 1 byte, if any. */ + if (unlikely(buflen & 0x01)) { + __le16 align_buf[1] = { 0 }; + + if (rw == READ) { + align_buf[0] = cpu_to_le16(ioread16(data_addr)); + memcpy(buffer, align_buf, 1); + } else { + memcpy(align_buf, buffer, 1); + iowrite16(le16_to_cpu(align_buf[0]), data_addr); + } + words++; + } + return buflen; +} + +/** + * Read the taskfile for 16bit non-True IDE only. + */ +static void octeon_cf_tf_read16(struct ata_port *ap, struct ata_taskfile *tf) +{ + u16 blob; + /* The base of the registers is at ioaddr.data_addr. */ + void __iomem *base = ap->ioaddr.data_addr; + + blob = __raw_readw(base + 0xc); + tf->feature = blob >> 8; + + blob = __raw_readw(base + 2); + tf->nsect = blob & 0xff; + tf->lbal = blob >> 8; + + blob = __raw_readw(base + 4); + tf->lbam = blob & 0xff; + tf->lbah = blob >> 8; + + blob = __raw_readw(base + 6); + tf->device = blob & 0xff; + tf->command = blob >> 8; + + if (tf->flags & ATA_TFLAG_LBA48) { + if (likely(ap->ioaddr.ctl_addr)) { + iowrite8(tf->ctl | ATA_HOB, ap->ioaddr.ctl_addr); + + blob = __raw_readw(base + 0xc); + tf->hob_feature = blob >> 8; + + blob = __raw_readw(base + 2); + tf->hob_nsect = blob & 0xff; + tf->hob_lbal = blob >> 8; + + blob = __raw_readw(base + 4); + tf->hob_lbam = blob & 0xff; + tf->hob_lbah = blob >> 8; + + iowrite8(tf->ctl, ap->ioaddr.ctl_addr); + ap->last_ctl = tf->ctl; + } else { + WARN_ON(1); + } + } +} + +static u8 octeon_cf_check_status16(struct ata_port *ap) +{ + u16 blob; + void __iomem *base = ap->ioaddr.data_addr; + + blob = __raw_readw(base + 6); + return blob >> 8; +} + +static int octeon_cf_softreset16(struct ata_link *link, unsigned int *classes, + unsigned long deadline) +{ + struct ata_port *ap = link->ap; + void __iomem *base = ap->ioaddr.data_addr; + int rc; + u8 err; + + DPRINTK("about to softreset\n"); + __raw_writew(ap->ctl, base + 0xe); + udelay(20); + __raw_writew(ap->ctl | ATA_SRST, base + 0xe); + udelay(20); + __raw_writew(ap->ctl, base + 0xe); + + rc = ata_sff_wait_after_reset(link, 1, deadline); + if (rc) { + ata_link_printk(link, KERN_ERR, "SRST failed (errno=%d)\n", rc); + return rc; + } + + /* determine by signature whether we have ATA or ATAPI devices */ + classes[0] = ata_sff_dev_classify(&link->device[0], 1, &err); + DPRINTK("EXIT, classes[0]=%u [1]=%u\n", classes[0], classes[1]); + return 0; +} + +/** + * Load the taskfile for 16bit non-True IDE only. The device_addr is + * not loaded, we do this as part of octeon_cf_exec_command16. + */ +static void octeon_cf_tf_load16(struct ata_port *ap, + const struct ata_taskfile *tf) +{ + unsigned int is_addr = tf->flags & ATA_TFLAG_ISADDR; + /* The base of the registers is at ioaddr.data_addr. */ + void __iomem *base = ap->ioaddr.data_addr; + + if (tf->ctl != ap->last_ctl) { + iowrite8(tf->ctl, ap->ioaddr.ctl_addr); + ap->last_ctl = tf->ctl; + ata_wait_idle(ap); + } + if (is_addr && (tf->flags & ATA_TFLAG_LBA48)) { + __raw_writew(tf->hob_feature << 8, base + 0xc); + __raw_writew(tf->hob_nsect | tf->hob_lbal << 8, base + 2); + __raw_writew(tf->hob_lbam | tf->hob_lbah << 8, base + 4); + VPRINTK("hob: feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n", + tf->hob_feature, + tf->hob_nsect, + tf->hob_lbal, + tf->hob_lbam, + tf->hob_lbah); + } + if (is_addr) { + __raw_writew(tf->feature << 8, base + 0xc); + __raw_writew(tf->nsect | tf->lbal << 8, base + 2); + __raw_writew(tf->lbam | tf->lbah << 8, base + 4); + VPRINTK("feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n", + tf->feature, + tf->nsect, + tf->lbal, + tf->lbam, + tf->lbah); + } + ata_wait_idle(ap); +} + + +static void octeon_cf_dev_select(struct ata_port *ap, unsigned int device) +{ +/* There is only one device, do nothing. */ + return; +} + +/* + * Issue ATA command to host controller. The device_addr is also sent + * as it must be written in a combined write with the command. + */ +static void octeon_cf_exec_command16(struct ata_port *ap, + const struct ata_taskfile *tf) +{ + /* The base of the registers is at ioaddr.data_addr. */ + void __iomem *base = ap->ioaddr.data_addr; + u16 blob; + + if (tf->flags & ATA_TFLAG_DEVICE) { + VPRINTK("device 0x%X\n", tf->device); + blob = tf->device; + } else { + blob = 0; + } + + DPRINTK("ata%u: cmd 0x%X\n", ap->print_id, tf->command); + blob |= (tf->command << 8); + __raw_writew(blob, base + 6); + + + ata_wait_idle(ap); +} + +static u8 octeon_cf_irq_on(struct ata_port *ap) +{ + return 0; +} + +static void octeon_cf_irq_clear(struct ata_port *ap) +{ + return; +} + +static void octeon_cf_dma_setup(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + struct octeon_cf_port *cf_port; + + cf_port = (struct octeon_cf_port *)ap->private_data; + DPRINTK("ENTER\n"); + /* issue r/w command */ + qc->cursg = qc->sg; + cf_port->dma_finished = 0; + ap->ops->sff_exec_command(ap, &qc->tf); + DPRINTK("EXIT\n"); +} + +/** + * Start a DMA transfer that was already setup + * + * @qc: Information about the DMA + */ +static void octeon_cf_dma_start(struct ata_queued_cmd *qc) +{ + struct octeon_cf_data *ocd = qc->ap->dev->platform_data; + union cvmx_mio_boot_dma_cfgx mio_boot_dma_cfg; + union cvmx_mio_boot_dma_intx mio_boot_dma_int; + struct scatterlist *sg; + + VPRINTK("%d scatterlists\n", qc->n_elem); + + /* Get the scatter list entry we need to DMA into */ + sg = qc->cursg; + BUG_ON(!sg); + + /* + * Clear the DMA complete status. + */ + mio_boot_dma_int.u64 = 0; + mio_boot_dma_int.s.done = 1; + cvmx_write_csr(CVMX_MIO_BOOT_DMA_INTX(ocd->dma_engine), + mio_boot_dma_int.u64); + + /* Enable the interrupt. */ + cvmx_write_csr(CVMX_MIO_BOOT_DMA_INT_ENX(ocd->dma_engine), + mio_boot_dma_int.u64); + + /* Set the direction of the DMA */ + mio_boot_dma_cfg.u64 = 0; + mio_boot_dma_cfg.s.en = 1; + mio_boot_dma_cfg.s.rw = ((qc->tf.flags & ATA_TFLAG_WRITE) != 0); + + /* + * Don't stop the DMA if the device deasserts DMARQ. Many + * compact flashes deassert DMARQ for a short time between + * sectors. Instead of stopping and restarting the DMA, we'll + * let the hardware do it. If the DMA is really stopped early + * due to an error condition, a later timeout will force us to + * stop. + */ + mio_boot_dma_cfg.s.clr = 0; + + /* Size is specified in 16bit words and minus one notation */ + mio_boot_dma_cfg.s.size = sg_dma_len(sg) / 2 - 1; + + /* We need to swap the high and low bytes of every 16 bits */ + mio_boot_dma_cfg.s.swap8 = 1; + + mio_boot_dma_cfg.s.adr = sg_dma_address(sg); + + VPRINTK("%s %d bytes address=%p\n", + (mio_boot_dma_cfg.s.rw) ? "write" : "read", sg->length, + (void *)(unsigned long)mio_boot_dma_cfg.s.adr); + + cvmx_write_csr(CVMX_MIO_BOOT_DMA_CFGX(ocd->dma_engine), + mio_boot_dma_cfg.u64); +} + +/** + * + * LOCKING: + * spin_lock_irqsave(host lock) + * + */ +static unsigned int octeon_cf_dma_finished(struct ata_port *ap, + struct ata_queued_cmd *qc) +{ + struct ata_eh_info *ehi = &ap->link.eh_info; + struct octeon_cf_data *ocd = ap->dev->platform_data; + union cvmx_mio_boot_dma_cfgx dma_cfg; + union cvmx_mio_boot_dma_intx dma_int; + struct octeon_cf_port *cf_port; + u8 status; + + VPRINTK("ata%u: protocol %d task_state %d\n", + ap->print_id, qc->tf.protocol, ap->hsm_task_state); + + + if (ap->hsm_task_state != HSM_ST_LAST) + return 0; + + cf_port = (struct octeon_cf_port *)ap->private_data; + + dma_cfg.u64 = cvmx_read_csr(CVMX_MIO_BOOT_DMA_CFGX(ocd->dma_engine)); + if (dma_cfg.s.size != 0xfffff) { + /* Error, the transfer was not complete. */ + qc->err_mask |= AC_ERR_HOST_BUS; + ap->hsm_task_state = HSM_ST_ERR; + } + + /* Stop and clear the dma engine. */ + dma_cfg.u64 = 0; + dma_cfg.s.size = -1; + cvmx_write_csr(CVMX_MIO_BOOT_DMA_CFGX(ocd->dma_engine), dma_cfg.u64); + + /* Disable the interrupt. */ + dma_int.u64 = 0; + cvmx_write_csr(CVMX_MIO_BOOT_DMA_INT_ENX(ocd->dma_engine), dma_int.u64); + + /* Clear the DMA complete status */ + dma_int.s.done = 1; + cvmx_write_csr(CVMX_MIO_BOOT_DMA_INTX(ocd->dma_engine), dma_int.u64); + + status = ap->ops->sff_check_status(ap); + + ata_sff_hsm_move(ap, qc, status, 0); + + if (unlikely(qc->err_mask) && (qc->tf.protocol == ATA_PROT_DMA)) + ata_ehi_push_desc(ehi, "DMA stat 0x%x", status); + + return 1; +} + +/* + * Check if any queued commands have more DMAs, if so start the next + * transfer, else do end of transfer handling. + */ +static irqreturn_t octeon_cf_interrupt(int irq, void *dev_instance) +{ + struct ata_host *host = dev_instance; + struct octeon_cf_port *cf_port; + int i; + unsigned int handled = 0; + unsigned long flags; + + spin_lock_irqsave(&host->lock, flags); + + DPRINTK("ENTER\n"); + for (i = 0; i < host->n_ports; i++) { + u8 status; + struct ata_port *ap; + struct ata_queued_cmd *qc; + union cvmx_mio_boot_dma_intx dma_int; + union cvmx_mio_boot_dma_cfgx dma_cfg; + struct octeon_cf_data *ocd; + + ap = host->ports[i]; + ocd = ap->dev->platform_data; + if (!ap || (ap->flags & ATA_FLAG_DISABLED)) + continue; + + ocd = ap->dev->platform_data; + cf_port = (struct octeon_cf_port *)ap->private_data; + dma_int.u64 = + cvmx_read_csr(CVMX_MIO_BOOT_DMA_INTX(ocd->dma_engine)); + dma_cfg.u64 = + cvmx_read_csr(CVMX_MIO_BOOT_DMA_CFGX(ocd->dma_engine)); + + qc = ata_qc_from_tag(ap, ap->link.active_tag); + + if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING)) && + (qc->flags & ATA_QCFLAG_ACTIVE)) { + if (dma_int.s.done && !dma_cfg.s.en) { + if (!sg_is_last(qc->cursg)) { + qc->cursg = sg_next(qc->cursg); + handled = 1; + octeon_cf_dma_start(qc); + continue; + } else { + cf_port->dma_finished = 1; + } + } + if (!cf_port->dma_finished) + continue; + status = ioread8(ap->ioaddr.altstatus_addr); + if (status & (ATA_BUSY | ATA_DRQ)) { + /* + * We are busy, try to handle it + * later. This is the DMA finished + * interrupt, and it could take a + * little while for the card to be + * ready for more commands. + */ + /* Clear DMA irq. */ + dma_int.u64 = 0; + dma_int.s.done = 1; + cvmx_write_csr(CVMX_MIO_BOOT_DMA_INTX(ocd->dma_engine), + dma_int.u64); + + queue_delayed_work(cf_port->wq, + &cf_port->delayed_finish, 1); + handled = 1; + } else { + handled |= octeon_cf_dma_finished(ap, qc); + } + } + } + spin_unlock_irqrestore(&host->lock, flags); + DPRINTK("EXIT\n"); + return IRQ_RETVAL(handled); +} + +static void octeon_cf_delayed_finish(struct work_struct *work) +{ + struct octeon_cf_port *cf_port = container_of(work, + struct octeon_cf_port, + delayed_finish.work); + struct ata_port *ap = cf_port->ap; + struct ata_host *host = ap->host; + struct ata_queued_cmd *qc; + unsigned long flags; + u8 status; + + spin_lock_irqsave(&host->lock, flags); + + /* + * If the port is not waiting for completion, it must have + * handled it previously. The hsm_task_state is + * protected by host->lock. + */ + if (ap->hsm_task_state != HSM_ST_LAST || !cf_port->dma_finished) + goto out; + + status = ioread8(ap->ioaddr.altstatus_addr); + if (status & (ATA_BUSY | ATA_DRQ)) { + /* Still busy, try again. */ + queue_delayed_work(cf_port->wq, + &cf_port->delayed_finish, 1); + goto out; + } + qc = ata_qc_from_tag(ap, ap->link.active_tag); + if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING)) && + (qc->flags & ATA_QCFLAG_ACTIVE)) + octeon_cf_dma_finished(ap, qc); +out: + spin_unlock_irqrestore(&host->lock, flags); +} + +static void octeon_cf_dev_config(struct ata_device *dev) +{ + /* + * A maximum of 2^20 - 1 16 bit transfers are possible with + * the bootbus DMA. So we need to throttle max_sectors to + * (2^12 - 1 == 4095) to assure that this can never happen. + */ + dev->max_sectors = min(dev->max_sectors, 4095U); +} + +/* + * Trap if driver tries to do standard bmdma commands. They are not + * supported. + */ +static void unreachable_qc(struct ata_queued_cmd *qc) +{ + BUG(); +} + +static u8 unreachable_port(struct ata_port *ap) +{ + BUG(); +} + +/* + * We don't do ATAPI DMA so return 0. + */ +static int octeon_cf_check_atapi_dma(struct ata_queued_cmd *qc) +{ + return 0; +} + +static unsigned int octeon_cf_qc_issue(struct ata_queued_cmd *qc) +{ + struct ata_port *ap = qc->ap; + + switch (qc->tf.protocol) { + case ATA_PROT_DMA: + WARN_ON(qc->tf.flags & ATA_TFLAG_POLLING); + + ap->ops->sff_tf_load(ap, &qc->tf); /* load tf registers */ + octeon_cf_dma_setup(qc); /* set up dma */ + octeon_cf_dma_start(qc); /* initiate dma */ + ap->hsm_task_state = HSM_ST_LAST; + break; + + case ATAPI_PROT_DMA: + dev_err(ap->dev, "Error, ATAPI not supported\n"); + BUG(); + + default: + return ata_sff_qc_issue(qc); + } + + return 0; +} + +static struct ata_port_operations octeon_cf_ops = { + .inherits = &ata_sff_port_ops, + .check_atapi_dma = octeon_cf_check_atapi_dma, + .qc_prep = ata_noop_qc_prep, + .qc_issue = octeon_cf_qc_issue, + .sff_dev_select = octeon_cf_dev_select, + .sff_irq_on = octeon_cf_irq_on, + .sff_irq_clear = octeon_cf_irq_clear, + .bmdma_setup = unreachable_qc, + .bmdma_start = unreachable_qc, + .bmdma_stop = unreachable_qc, + .bmdma_status = unreachable_port, + .cable_detect = ata_cable_40wire, + .set_piomode = octeon_cf_set_piomode, + .set_dmamode = octeon_cf_set_dmamode, + .dev_config = octeon_cf_dev_config, +}; + +static int __devinit octeon_cf_probe(struct platform_device *pdev) +{ + struct resource *res_cs0, *res_cs1; + + void __iomem *cs0; + void __iomem *cs1 = NULL; + struct ata_host *host; + struct ata_port *ap; + struct octeon_cf_data *ocd; + int irq = 0; + irq_handler_t irq_handler = NULL; + void __iomem *base; + struct octeon_cf_port *cf_port; + + res_cs0 = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + if (!res_cs0) + return -EINVAL; + + ocd = pdev->dev.platform_data; + + cs0 = devm_ioremap_nocache(&pdev->dev, res_cs0->start, + res_cs0->end - res_cs0->start + 1); + + if (!cs0) + return -ENOMEM; + + /* Determine from availability of DMA if True IDE mode or not */ + if (ocd->dma_engine >= 0) { + res_cs1 = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res_cs1) + return -EINVAL; + + cs1 = devm_ioremap_nocache(&pdev->dev, res_cs1->start, + res_cs0->end - res_cs1->start + 1); + + if (!cs1) + return -ENOMEM; + } + + cf_port = kzalloc(sizeof(*cf_port), GFP_KERNEL); + if (!cf_port) + return -ENOMEM; + + /* allocate host */ + host = ata_host_alloc(&pdev->dev, 1); + if (!host) + goto free_cf_port; + + ap = host->ports[0]; + ap->private_data = cf_port; + cf_port->ap = ap; + ap->ops = &octeon_cf_ops; + ap->pio_mask = 0x7f; /* Support PIO 0-6 */ + ap->flags |= ATA_FLAG_MMIO | ATA_FLAG_NO_LEGACY + | ATA_FLAG_NO_ATAPI | ATA_FLAG_PIO_POLLING; + + base = cs0 + ocd->base_region_bias; + if (!ocd->is16bit) { + ap->ioaddr.cmd_addr = base; + ata_sff_std_ports(&ap->ioaddr); + + ap->ioaddr.altstatus_addr = base + 0xe; + ap->ioaddr.ctl_addr = base + 0xe; + octeon_cf_ops.sff_data_xfer = octeon_cf_data_xfer8; + } else if (cs1) { + /* Presence of cs1 indicates True IDE mode. */ + ap->ioaddr.cmd_addr = base + (ATA_REG_CMD << 1) + 1; + ap->ioaddr.data_addr = base + (ATA_REG_DATA << 1); + ap->ioaddr.error_addr = base + (ATA_REG_ERR << 1) + 1; + ap->ioaddr.feature_addr = base + (ATA_REG_FEATURE << 1) + 1; + ap->ioaddr.nsect_addr = base + (ATA_REG_NSECT << 1) + 1; + ap->ioaddr.lbal_addr = base + (ATA_REG_LBAL << 1) + 1; + ap->ioaddr.lbam_addr = base + (ATA_REG_LBAM << 1) + 1; + ap->ioaddr.lbah_addr = base + (ATA_REG_LBAH << 1) + 1; + ap->ioaddr.device_addr = base + (ATA_REG_DEVICE << 1) + 1; + ap->ioaddr.status_addr = base + (ATA_REG_STATUS << 1) + 1; + ap->ioaddr.command_addr = base + (ATA_REG_CMD << 1) + 1; + ap->ioaddr.altstatus_addr = cs1 + (6 << 1) + 1; + ap->ioaddr.ctl_addr = cs1 + (6 << 1) + 1; + octeon_cf_ops.sff_data_xfer = octeon_cf_data_xfer16; + + ap->mwdma_mask = 0x1f; /* Support MWDMA 0-4 */ + irq = platform_get_irq(pdev, 0); + irq_handler = octeon_cf_interrupt; + + /* True IDE mode needs delayed work to poll for not-busy. */ + cf_port->wq = create_singlethread_workqueue(DRV_NAME); + if (!cf_port->wq) + goto free_cf_port; + INIT_DELAYED_WORK(&cf_port->delayed_finish, + octeon_cf_delayed_finish); + + } else { + /* 16 bit but not True IDE */ + octeon_cf_ops.sff_data_xfer = octeon_cf_data_xfer16; + octeon_cf_ops.softreset = octeon_cf_softreset16; + octeon_cf_ops.sff_check_status = octeon_cf_check_status16; + octeon_cf_ops.sff_tf_read = octeon_cf_tf_read16; + octeon_cf_ops.sff_tf_load = octeon_cf_tf_load16; + octeon_cf_ops.sff_exec_command = octeon_cf_exec_command16; + + ap->ioaddr.data_addr = base + ATA_REG_DATA; + ap->ioaddr.nsect_addr = base + ATA_REG_NSECT; + ap->ioaddr.lbal_addr = base + ATA_REG_LBAL; + ap->ioaddr.ctl_addr = base + 0xe; + ap->ioaddr.altstatus_addr = base + 0xe; + } + + ata_port_desc(ap, "cmd %p ctl %p", base, ap->ioaddr.ctl_addr); + + + dev_info(&pdev->dev, "version " DRV_VERSION" %d bit%s.\n", + (ocd->is16bit) ? 16 : 8, + (cs1) ? ", True IDE" : ""); + + + return ata_host_activate(host, irq, irq_handler, 0, &octeon_cf_sht); + +free_cf_port: + kfree(cf_port); + return -ENOMEM; +} + +static struct platform_driver octeon_cf_driver = { + .probe = octeon_cf_probe, + .driver = { + .name = DRV_NAME, + .owner = THIS_MODULE, + }, +}; + +static int __init octeon_cf_init(void) +{ + return platform_driver_register(&octeon_cf_driver); +} + + +MODULE_AUTHOR("David Daney "); +MODULE_DESCRIPTION("low-level driver for Cavium OCTEON Compact Flash PATA"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); +MODULE_ALIAS("platform:" DRV_NAME); + +module_init(octeon_cf_init); -- cgit From bc42b24e6ef01ca7b23fafee7237882d27031614 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 14 Jan 2009 16:48:42 -0800 Subject: drivers/ata/pata_ali.c: s/isa_bridge/ali_isa_bridge/ to fix alpha build drivers/ata/pata_ali.c:44: error: static declaration of 'isa_bridge' follows non-static declaration arch/alpha/include/asm/pci.h:274: error: previous declaration of 'isa_bridge' was here Cc: Alan Cox Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- drivers/ata/pata_ali.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c index a7999c19f0c..eb99dbe7808 100644 --- a/drivers/ata/pata_ali.c +++ b/drivers/ata/pata_ali.c @@ -41,7 +41,7 @@ static int ali_atapi_dma = 0; module_param_named(atapi_dma, ali_atapi_dma, int, 0644); MODULE_PARM_DESC(atapi_dma, "Enable ATAPI DMA (0=disable, 1=enable)"); -static struct pci_dev *isa_bridge; +static struct pci_dev *ali_isa_bridge; /* * Cable special cases @@ -346,13 +346,13 @@ static void ali_c2_c3_postreset(struct ata_link *link, unsigned int *classes) int port_bit = 4 << link->ap->port_no; /* If our bridge is an ALI 1533 then do the extra work */ - if (isa_bridge) { + if (ali_isa_bridge) { /* Tristate and re-enable the bus signals */ - pci_read_config_byte(isa_bridge, 0x58, &r); + pci_read_config_byte(ali_isa_bridge, 0x58, &r); r &= ~port_bit; - pci_write_config_byte(isa_bridge, 0x58, r); + pci_write_config_byte(ali_isa_bridge, 0x58, r); r |= port_bit; - pci_write_config_byte(isa_bridge, 0x58, r); + pci_write_config_byte(ali_isa_bridge, 0x58, r); } ata_sff_postreset(link, classes); } @@ -467,14 +467,14 @@ static void ali_init_chipset(struct pci_dev *pdev) pci_write_config_byte(pdev, 0x53, tmp); } north = pci_get_bus_and_slot(0, PCI_DEVFN(0,0)); - if (north && north->vendor == PCI_VENDOR_ID_AL && isa_bridge) { + if (north && north->vendor == PCI_VENDOR_ID_AL && ali_isa_bridge) { /* Configure the ALi bridge logic. For non ALi rely on BIOS. Set the south bridge enable bit */ - pci_read_config_byte(isa_bridge, 0x79, &tmp); + pci_read_config_byte(ali_isa_bridge, 0x79, &tmp); if (pdev->revision == 0xC2) - pci_write_config_byte(isa_bridge, 0x79, tmp | 0x04); + pci_write_config_byte(ali_isa_bridge, 0x79, tmp | 0x04); else if (pdev->revision > 0xC2 && pdev->revision < 0xC5) - pci_write_config_byte(isa_bridge, 0x79, tmp | 0x02); + pci_write_config_byte(ali_isa_bridge, 0x79, tmp | 0x02); } pci_dev_put(north); ata_pci_bmdma_clear_simplex(pdev); @@ -571,9 +571,9 @@ static int ali_init_one(struct pci_dev *pdev, const struct pci_device_id *id) ali_init_chipset(pdev); - if (isa_bridge && pdev->revision >= 0x20 && pdev->revision < 0xC2) { + if (ali_isa_bridge && pdev->revision >= 0x20 && pdev->revision < 0xC2) { /* Are we paired with a UDMA capable chip */ - pci_read_config_byte(isa_bridge, 0x5E, &tmp); + pci_read_config_byte(ali_isa_bridge, 0x5E, &tmp); if ((tmp & 0x1E) == 0x12) ppi[0] = &info_20_udma; } @@ -617,11 +617,11 @@ static struct pci_driver ali_pci_driver = { static int __init ali_init(void) { int ret; - isa_bridge = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); + ali_isa_bridge = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL); ret = pci_register_driver(&ali_pci_driver); if (ret < 0) - pci_dev_put(isa_bridge); + pci_dev_put(ali_isa_bridge); return ret; } @@ -629,7 +629,7 @@ static int __init ali_init(void) static void __exit ali_exit(void) { pci_unregister_driver(&ali_pci_driver); - pci_dev_put(isa_bridge); + pci_dev_put(ali_isa_bridge); } -- cgit From e4ac522bd7261829197a3d01d5feedb2aca8ae38 Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Wed, 14 Jan 2009 14:02:38 +0100 Subject: sata_fsl: Return non-zero on error in probe() while I was looking over kernel sources I've found this small bug. Formerly, zero was returned even if an error happened. Signed-off-by: Michal Sojka Signed-off-by: Jeff Garzik --- drivers/ata/sata_fsl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c index 1a56db92ff7..55bc88c1707 100644 --- a/drivers/ata/sata_fsl.c +++ b/drivers/ata/sata_fsl.c @@ -1288,7 +1288,7 @@ static const struct ata_port_info sata_fsl_port_info[] = { static int sata_fsl_probe(struct of_device *ofdev, const struct of_device_id *match) { - int retval = 0; + int retval = -ENXIO; void __iomem *hcr_base = NULL; void __iomem *ssr_base = NULL; void __iomem *csr_base = NULL; -- cgit From 50246dd41ccbcb47beb06d6c1d9355f6b7137a11 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 16 Jan 2009 08:14:51 -0800 Subject: Revert "PCI PM: Register power state of devices during initialization" This reverts commit 98e6e286d7b01deb7453b717aa38ebb69d6cefc0, as Yinghai Lu reports that it breaks kexec with at least the e1000 and e1000e drivers. The reason is that the shutdown sequence puts the hardware into D3 sleep, and the commit causes us to claim that it then is in D0 (running) state just because we don't understand the PM capabilities. Which then later makes "pci_set_power_state()" not do anything, and the device never wakes up properly and just returns 0xff to everything. Reported-by: Yinghai Lu Acked-by: From: Rafael J. Wysocki Cc: Jesse Barnes Signed-off-by: Linus Torvalds --- drivers/pci/pci.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index c12f6c79069..e491fdedf70 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1260,15 +1260,14 @@ void pci_pm_init(struct pci_dev *dev) /* find PCI PM capability in list */ pm = pci_find_capability(dev, PCI_CAP_ID_PM); if (!pm) - goto Exit; - + return; /* Check device's ability to generate PME# */ pci_read_config_word(dev, pm + PCI_PM_PMC, &pmc); if ((pmc & PCI_PM_CAP_VER_MASK) > 3) { dev_err(&dev->dev, "unsupported PM cap regs version (%u)\n", pmc & PCI_PM_CAP_VER_MASK); - goto Exit; + return; } dev->pm_cap = pm; @@ -1307,9 +1306,6 @@ void pci_pm_init(struct pci_dev *dev) } else { dev->pme_support = 0; } - - Exit: - pci_update_current_state(dev, PCI_D0); } /** -- cgit From 1d9e2ae949411c2f329f30e01ea0355cd02c4296 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 16 Jan 2009 11:58:19 -0500 Subject: Btrfs: Clear the device->running_pending flag before bailing on congestion Btrfs maintains a queue of async bio submissions so the checksumming threads don't have to wait on get_request_wait. In order to avoid extra wakeups, this code has a running_pending flag that is used to tell new submissions they don't need to wake the thread. When the threads notice congestion on a single device, they may decide to requeue the job and move on to other devices. This makes sure the running_pending flag is cleared before the job is requeued. It should help avoid IO stalls by making sure the task is woken up when new submissions come in. Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b187b537888..3451e1cca2b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -220,6 +220,7 @@ loop: tail->bi_next = old_head; else device->pending_bio_tail = tail; + device->running_pending = 0; spin_unlock(&device->io_lock); btrfs_requeue_work(&device->work); -- cgit From c071fcfdb60e7abbe95e02460005d6bca165bf24 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Fri, 16 Jan 2009 11:59:08 -0500 Subject: Btrfs: fix ioctl arg size (userland incompatible change!) The structure used to send device in btrfs ioctl calls was not properly aligned, and so 32 bit ioctls would not work properly on 64 bit kernels. We could fix this with compat ioctls, but we're just one byte away and it doesn't make sense at this stage to carry about the compat ioctls forever at this stage in the project. This patch brings the ioctl arg up to an evenly aligned 4k. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.h | 14 ++++++++------ fs/btrfs/super.c | 3 ++- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 78049ea208d..b320b103fa1 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -22,13 +22,20 @@ #define BTRFS_IOCTL_MAGIC 0x94 #define BTRFS_VOL_NAME_MAX 255 -#define BTRFS_PATH_NAME_MAX 3072 +#define BTRFS_PATH_NAME_MAX 4087 +/* this should be 4k */ struct btrfs_ioctl_vol_args { __s64 fd; char name[BTRFS_PATH_NAME_MAX + 1]; }; +struct btrfs_ioctl_clone_range_args { + __s64 src_fd; + __u64 src_offset, src_length; + __u64 dest_offset; +}; + #define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ struct btrfs_ioctl_vol_args) #define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ @@ -52,11 +59,6 @@ struct btrfs_ioctl_vol_args { struct btrfs_ioctl_vol_args) #define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ struct btrfs_ioctl_vol_args) -struct btrfs_ioctl_clone_range_args { - __s64 src_fd; - __u64 src_offset, src_length; - __u64 dest_offset; -}; #define BTRFS_IOC_CLONE_RANGE _IOW(BTRFS_IOCTL_MAGIC, 13, \ struct btrfs_ioctl_clone_range_args) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b4c101d9322..92c9b543def 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -582,7 +582,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, { struct btrfs_ioctl_vol_args *vol; struct btrfs_fs_devices *fs_devices; - int ret = 0; + int ret = -ENOTTY; int len; if (!capable(CAP_SYS_ADMIN)) @@ -594,6 +594,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd, goto out; } len = strnlen(vol->name, BTRFS_PATH_NAME_MAX); + switch (cmd) { case BTRFS_IOC_SCAN_DEV: ret = btrfs_scan_one_device(vol->name, FMODE_READ, -- cgit From 235c4a59278eb07e61d909f1f0c233733034a8b3 Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Wed, 14 Jan 2009 02:57:47 +0300 Subject: ACPI: EC: Limit workaround for ASUS notebooks even more References: http://bugzilla.kernel.org/show_bug.cgi?id=11884 Signed-off-by: Alexey Starikovskiy Signed-off-by: Len Brown --- drivers/acpi/ec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 8dfcbb8aff7..e0794264b9f 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1027,7 +1027,8 @@ int __init acpi_ec_ecdt_probe(void) * which needs it, has fake EC._INI method, so use it as flag. * Keep boot_ec struct as it will be needed soon. */ - if (ACPI_FAILURE(acpi_get_handle(boot_ec->handle, "_INI", &dummy))) + if (!dmi_name_in_vendors("ASUS") || + ACPI_FAILURE(acpi_get_handle(boot_ec->handle, "_INI", &dummy))) return -ENODEV; install: if (!ec_install_handlers(boot_ec)) { -- cgit From 3bfafd6b136bea2de9bd96c01b7e3808635a15b2 Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Fri, 16 Jan 2009 11:03:01 -0800 Subject: netxen: avoid invalid iounmap For NX3031 only one I/O range is mapped, so unmapping other two which are used by older chips, causes this warning on ppc64. "Attempt to iounmap early bolted mapping at 0x0000000000000000" Signed-off-by: Dhananjay Phadke Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index 86867405a36..f8e26290a22 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -1004,8 +1004,10 @@ static void __devexit netxen_nic_remove(struct pci_dev *pdev) iounmap(adapter->ahw.db_base); iounmap(adapter->ahw.pci_base0); - iounmap(adapter->ahw.pci_base1); - iounmap(adapter->ahw.pci_base2); + if (adapter->ahw.pci_base1 != NULL) + iounmap(adapter->ahw.pci_base1); + if (adapter->ahw.pci_base2 != NULL) + iounmap(adapter->ahw.pci_base2); pci_release_regions(pdev); pci_disable_device(pdev); -- cgit From c6cb0e878446c79f42e7833d7bb69ed6bfbb381f Mon Sep 17 00:00:00 2001 From: Alexey Starikovskiy Date: Wed, 14 Jan 2009 02:57:53 +0300 Subject: ACPI: EC: Don't trust ECDT tables from ASUS http://bugzilla.kernel.org/show_bug.cgi?id=9399 http://bugzilla.kernel.org/show_bug.cgi?id=11880 Signed-off-by: Alexey Starikovskiy Signed-off-by: Len Brown --- drivers/acpi/ec.c | 74 ++++++++++++++++++++++--------------------------------- 1 file changed, 30 insertions(+), 44 deletions(-) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index e0794264b9f..a2b82c90a68 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -120,31 +120,6 @@ static struct acpi_ec { spinlock_t curr_lock; } *boot_ec, *first_ec; -/* - * Some Asus system have exchanged ECDT data/command IO addresses. - */ -static int print_ecdt_error(const struct dmi_system_id *id) -{ - printk(KERN_NOTICE PREFIX "%s detected - " - "ECDT has exchanged control/data I/O address\n", - id->ident); - return 0; -} - -static struct dmi_system_id __cpuinitdata ec_dmi_table[] = { - { - print_ecdt_error, "Asus L4R", { - DMI_MATCH(DMI_BIOS_VERSION, "1008.006"), - DMI_MATCH(DMI_PRODUCT_NAME, "L4R"), - DMI_MATCH(DMI_BOARD_NAME, "L4R") }, NULL}, - { - print_ecdt_error, "Asus M6R", { - DMI_MATCH(DMI_BIOS_VERSION, "0207"), - DMI_MATCH(DMI_PRODUCT_NAME, "M6R"), - DMI_MATCH(DMI_BOARD_NAME, "M6R") }, NULL}, - {}, -}; - /* -------------------------------------------------------------------------- Transaction Management -------------------------------------------------------------------------- */ @@ -983,8 +958,8 @@ static const struct acpi_device_id ec_device_ids[] = { int __init acpi_ec_ecdt_probe(void) { acpi_status status; + struct acpi_ec *saved_ec = NULL; struct acpi_table_ecdt *ecdt_ptr; - acpi_handle dummy; boot_ec = make_acpi_ec(); if (!boot_ec) @@ -998,21 +973,16 @@ int __init acpi_ec_ecdt_probe(void) pr_info(PREFIX "EC description table is found, configuring boot EC\n"); boot_ec->command_addr = ecdt_ptr->control.address; boot_ec->data_addr = ecdt_ptr->data.address; - if (dmi_check_system(ec_dmi_table)) { - /* - * If the board falls into ec_dmi_table, it means - * that ECDT table gives the incorrect command/status - * & data I/O address. Just fix it. - */ - boot_ec->data_addr = ecdt_ptr->control.address; - boot_ec->command_addr = ecdt_ptr->data.address; - } boot_ec->gpe = ecdt_ptr->gpe; boot_ec->handle = ACPI_ROOT_OBJECT; acpi_get_handle(ACPI_ROOT_OBJECT, ecdt_ptr->id, &boot_ec->handle); - /* Add some basic check against completely broken table */ - if (boot_ec->data_addr != boot_ec->command_addr) + /* Don't trust ECDT, which comes from ASUSTek */ + if (!dmi_name_in_vendors("ASUS")) goto install; + saved_ec = kmalloc(sizeof(struct acpi_ec), GFP_KERNEL); + if (!saved_ec) + return -ENOMEM; + memcpy(&saved_ec, boot_ec, sizeof(saved_ec)); /* fall through */ } /* This workaround is needed only on some broken machines, @@ -1023,13 +993,29 @@ int __init acpi_ec_ecdt_probe(void) /* Check that acpi_get_devices actually find something */ if (ACPI_FAILURE(status) || !boot_ec->handle) goto error; - /* We really need to limit this workaround, the only ASUS, - * which needs it, has fake EC._INI method, so use it as flag. - * Keep boot_ec struct as it will be needed soon. - */ - if (!dmi_name_in_vendors("ASUS") || - ACPI_FAILURE(acpi_get_handle(boot_ec->handle, "_INI", &dummy))) - return -ENODEV; + if (saved_ec) { + /* try to find good ECDT from ASUSTek */ + if (saved_ec->command_addr != boot_ec->command_addr || + saved_ec->data_addr != boot_ec->data_addr || + saved_ec->gpe != boot_ec->gpe || + saved_ec->handle != boot_ec->handle) + pr_info(PREFIX "ASUSTek keeps feeding us with broken " + "ECDT tables, which are very hard to workaround. " + "Trying to use DSDT EC info instead. Please send " + "output of acpidump to linux-acpi@vger.kernel.org\n"); + kfree(saved_ec); + saved_ec = NULL; + } else { + /* We really need to limit this workaround, the only ASUS, + * which needs it, has fake EC._INI method, so use it as flag. + * Keep boot_ec struct as it will be needed soon. + */ + acpi_handle dummy; + if (!dmi_name_in_vendors("ASUS") || + ACPI_FAILURE(acpi_get_handle(boot_ec->handle, "_INI", + &dummy))) + return -ENODEV; + } install: if (!ec_install_handlers(boot_ec)) { first_ec = boot_ec; -- cgit From 009777846165fcc49352c0f1487e3a96102884c3 Mon Sep 17 00:00:00 2001 From: Dhananjay Phadke Date: Fri, 16 Jan 2009 11:03:25 -0800 Subject: netxen: include ipv6.h (fixes build failure) Fixes a build error in absence of CONFIG_IPV6: drivers/net/netxen/netxen_nic_main.c:1189: error: implicit declaration of function 'ipv6_hdr' drivers/net/netxen/netxen_nic_main.c:1189: error: invalid type argument of '->' Reported-by: Ingo Molnar Signed-off-by: Dhananjay Phadke Signed-off-by: David S. Miller --- drivers/net/netxen/netxen_nic_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c index f8e26290a22..d854f07ef4d 100644 --- a/drivers/net/netxen/netxen_nic_main.c +++ b/drivers/net/netxen/netxen_nic_main.c @@ -41,6 +41,7 @@ #include #include #include +#include MODULE_DESCRIPTION("NetXen Multi port (1/10) Gigabit Network Driver"); MODULE_LICENSE("GPL"); -- cgit From 0e4240d94628530a912d216cad1e32d2e3827327 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 16 Jan 2009 12:53:42 -0500 Subject: thermal fixup for broken BIOS which has invalid trip points. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ACPI thermal driver only re-evaluate VALID trip points. For the broken BIOS show in http://bugzilla.kernel.org/show_bug.cgi?id=8544 the active[0] is set to invalid at boot time and it will not be re-evaluated again. We can still get a single warning message at boot time. http://marc.info/?l=linux-kernel&m=120496222629983&w=2 http://bugzilla.kernel.org/show_bug.cgi?id=12203 Signed-off-by: Zhang Rui Tested-by: Márton Németh Signed-off-by: Len Brown --- drivers/acpi/thermal.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index 073ff09218a..99e6f1f8ea4 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -416,7 +416,8 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) } /* Passive (optional) */ - if (flag & ACPI_TRIPS_PASSIVE) { + if (((flag & ACPI_TRIPS_PASSIVE) && tz->trips.passive.flags.valid) || + (flag == ACPI_TRIPS_INIT)) { valid = tz->trips.passive.flags.valid; if (psv == -1) { status = AE_SUPPORT; @@ -462,8 +463,11 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) memset(&devices, 0, sizeof(struct acpi_handle_list)); status = acpi_evaluate_reference(tz->device->handle, "_PSL", NULL, &devices); - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { + printk(KERN_WARNING PREFIX + "Invalid passive threshold\n"); tz->trips.passive.flags.valid = 0; + } else tz->trips.passive.flags.valid = 1; @@ -487,7 +491,8 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) if (act == -1) break; /* disable all active trip points */ - if (flag & ACPI_TRIPS_ACTIVE) { + if ((flag == ACPI_TRIPS_INIT) || ((flag & ACPI_TRIPS_ACTIVE) && + tz->trips.active[i].flags.valid)) { status = acpi_evaluate_integer(tz->device->handle, name, NULL, &tmp); if (ACPI_FAILURE(status)) { @@ -521,8 +526,11 @@ static int acpi_thermal_trips_update(struct acpi_thermal *tz, int flag) memset(&devices, 0, sizeof(struct acpi_handle_list)); status = acpi_evaluate_reference(tz->device->handle, name, NULL, &devices); - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status)) { + printk(KERN_WARNING PREFIX + "Invalid active%d threshold\n", i); tz->trips.active[i].flags.valid = 0; + } else tz->trips.active[i].flags.valid = 1; -- cgit From 33f1d7ecc6cffff3c618a02295de969ebbacd95d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 6 Jan 2009 21:14:04 +0100 Subject: PM: Fix freezer compilation if PM_SLEEP is unset Freezer fails to compile if with the following configuration settings: CONFIG_CGROUPS=y CONFIG_CGROUP_FREEZER=y CONFIG_MODULES=y CONFIG_FREEZER=y CONFIG_PM=y CONFIG_PM_SLEEP=n Fix this by making process.o compilation depend on CONFIG_FREEZER. Reported-by: Cheng Renquan Signed-off-by: Rafael J. Wysocki Acked-by: Pavel Machek Signed-off-by: Len Brown --- kernel/power/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/power/Makefile b/kernel/power/Makefile index 597823b5b70..d7a10167a25 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -4,7 +4,8 @@ EXTRA_CFLAGS += -DDEBUG endif obj-y := main.o -obj-$(CONFIG_PM_SLEEP) += process.o console.o +obj-$(CONFIG_PM_SLEEP) += console.o +obj-$(CONFIG_FREEZER) += process.o obj-$(CONFIG_HIBERNATION) += swsusp.o disk.o snapshot.o swap.o user.o obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o -- cgit From 5a4ccaf37ffece09ef33f1cfec67efa8ee56f967 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 6 Jan 2009 21:15:32 +0100 Subject: kprobes: check CONFIG_FREEZER instead of CONFIG_PM Check CONFIG_FREEZER instead of CONFIG_PM because kprobe booster depends on freeze_processes() and thaw_processes() when CONFIG_PREEMPT=y. This fixes a linkage error which occurs when CONFIG_PREEMPT=y, CONFIG_PM=y and CONFIG_FREEZER=n. Reported-by: Cheng Renquan Signed-off-by: Masami Hiramatsu Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar Signed-off-by: Len Brown --- arch/ia64/kernel/kprobes.c | 2 +- arch/x86/kernel/kprobes.c | 2 +- kernel/kprobes.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c index f90be51b112..9adac441ac9 100644 --- a/arch/ia64/kernel/kprobes.c +++ b/arch/ia64/kernel/kprobes.c @@ -870,7 +870,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args) return 1; ss_probe: -#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) +#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) { /* Boost up -- we can execute copied instructions directly */ ia64_psr(regs)->ri = p->ainsn.slot; diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 884d985b8b8..e948b28a5a9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -446,7 +446,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { -#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) +#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) if (p->ainsn.boostable == 1 && !p->post_handler) { /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 1b9cbdc0127..7ba8cd9845c 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -123,7 +123,7 @@ static int collect_garbage_slots(void); static int __kprobes check_safety(void) { int ret = 0; -#if defined(CONFIG_PREEMPT) && defined(CONFIG_PM) +#if defined(CONFIG_PREEMPT) && defined(CONFIG_FREEZER) ret = freeze_processes(); if (ret == 0) { struct task_struct *p, *q; -- cgit From c3407710b76610962a5ebb244172631ef9eeb51a Mon Sep 17 00:00:00 2001 From: David Brownell Date: Fri, 9 Jan 2009 12:17:08 -0800 Subject: ACPI: fix ACPI_FADT_S4_RTC_WAKE comment Make the comment for ACPI_FADT_S4_RTC_WAKE match the ACPI spec; that bit has nothing to do with status bits. Signed-off-by: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- include/acpi/actbl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/acpi/actbl.h b/include/acpi/actbl.h index 813e4b6c2c0..bf8d4cfd8cf 100644 --- a/include/acpi/actbl.h +++ b/include/acpi/actbl.h @@ -245,7 +245,7 @@ struct acpi_table_fadt { #define ACPI_FADT_POWER_BUTTON (1<<4) /* 04: Power button is handled as a generic feature */ #define ACPI_FADT_SLEEP_BUTTON (1<<5) /* 05: Sleep button is handled as a generic feature, or not present */ #define ACPI_FADT_FIXED_RTC (1<<6) /* 06: RTC wakeup stat not in fixed register space */ -#define ACPI_FADT_S4_RTC_WAKE (1<<7) /* 07: RTC wakeup stat not possible from S4 */ +#define ACPI_FADT_S4_RTC_WAKE (1<<7) /* 07: RTC wakeup possible from S4 */ #define ACPI_FADT_32BIT_TIMER (1<<8) /* 08: tmr_val is 32 bits 0=24-bits */ #define ACPI_FADT_DOCKING_SUPPORTED (1<<9) /* 09: Docking supported */ #define ACPI_FADT_RESET_REGISTER (1<<10) /* 10: System reset via the FADT RESET_REG supported */ -- cgit From 3af9bfcb433c110839e4c815a9928377f66bbd0e Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Wed, 3 Dec 2008 20:31:11 +0000 Subject: eeepc-laptop: enable Bluetooth ACPI details Although rfkill support for the EEE bluetooth device has been added to 2.6.28-rc the appropriate ACPI accessor definitions were not added, so the support was non functional. The patch below adds the get and set accessors and has been verified to work on an EEE 901. Signed-off-by: Jonathan McDowell Acked-by: Matthew Garrett Acked-by: Corentin Chary Signed-off-by: Len Brown --- drivers/platform/x86/eeepc-laptop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c index 02fe2b8b893..9d93cb971e5 100644 --- a/drivers/platform/x86/eeepc-laptop.c +++ b/drivers/platform/x86/eeepc-laptop.c @@ -90,7 +90,7 @@ enum { }; static const char *cm_getv[] = { - "WLDG", NULL, NULL, NULL, + "WLDG", "BTHG", NULL, NULL, "CAMG", NULL, NULL, NULL, NULL, "PBLG", NULL, NULL, "CFVG", NULL, NULL, NULL, @@ -99,7 +99,7 @@ static const char *cm_getv[] = { }; static const char *cm_setv[] = { - "WLDS", NULL, NULL, NULL, + "WLDS", "BTHS", NULL, NULL, "CAMS", NULL, NULL, NULL, "SDSP", "PBLS", "HDPS", NULL, "CFVS", NULL, NULL, NULL, -- cgit From cad73120ab0dfd484682229346de8c16073577e1 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 9 Jan 2009 17:23:38 -0500 Subject: dell-laptop: move to drivers/platform/x86/ from drivers/misc/ Signed-off-by: Len Brown --- drivers/misc/Makefile | 1 - drivers/misc/dell-laptop.c | 436 ------------------------------------- drivers/platform/x86/Kconfig | 12 + drivers/platform/x86/Makefile | 1 + drivers/platform/x86/dell-laptop.c | 436 +++++++++++++++++++++++++++++++++++++ 5 files changed, 449 insertions(+), 437 deletions(-) delete mode 100644 drivers/misc/dell-laptop.c create mode 100644 drivers/platform/x86/dell-laptop.c diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile index 9cf8ae6e4b3..d5749a7bc77 100644 --- a/drivers/misc/Makefile +++ b/drivers/misc/Makefile @@ -10,7 +10,6 @@ obj-$(CONFIG_ATMEL_TCLIB) += atmel_tclib.o obj-$(CONFIG_ICS932S401) += ics932s401.o obj-$(CONFIG_LKDTM) += lkdtm.o obj-$(CONFIG_TIFM_CORE) += tifm_core.o -obj-$(CONFIG_DELL_LAPTOP) += dell-laptop.o obj-$(CONFIG_TIFM_7XX1) += tifm_7xx1.o obj-$(CONFIG_PHANTOM) += phantom.o obj-$(CONFIG_SGI_IOC4) += ioc4.o diff --git a/drivers/misc/dell-laptop.c b/drivers/misc/dell-laptop.c deleted file mode 100644 index 4d33a2068b7..00000000000 --- a/drivers/misc/dell-laptop.c +++ /dev/null @@ -1,436 +0,0 @@ -/* - * Driver for Dell laptop extras - * - * Copyright (c) Red Hat - * - * Based on documentation in the libsmbios package, Copyright (C) 2005 Dell - * Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "../firmware/dcdbas.h" - -#define BRIGHTNESS_TOKEN 0x7d - -/* This structure will be modified by the firmware when we enter - * system management mode, hence the volatiles */ - -struct calling_interface_buffer { - u16 class; - u16 select; - volatile u32 input[4]; - volatile u32 output[4]; -} __packed; - -struct calling_interface_token { - u16 tokenID; - u16 location; - union { - u16 value; - u16 stringlength; - }; -}; - -struct calling_interface_structure { - struct dmi_header header; - u16 cmdIOAddress; - u8 cmdIOCode; - u32 supportedCmds; - struct calling_interface_token tokens[]; -} __packed; - -static int da_command_address; -static int da_command_code; -static int da_num_tokens; -static struct calling_interface_token *da_tokens; - -static struct backlight_device *dell_backlight_device; -static struct rfkill *wifi_rfkill; -static struct rfkill *bluetooth_rfkill; -static struct rfkill *wwan_rfkill; - -static const struct dmi_system_id __initdata dell_device_table[] = { - { - .ident = "Dell laptop", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), - DMI_MATCH(DMI_CHASSIS_TYPE, "8"), - }, - }, - { } -}; - -static void parse_da_table(const struct dmi_header *dm) -{ - /* Final token is a terminator, so we don't want to copy it */ - int tokens = (dm->length-11)/sizeof(struct calling_interface_token)-1; - struct calling_interface_structure *table = - container_of(dm, struct calling_interface_structure, header); - - /* 4 bytes of table header, plus 7 bytes of Dell header, plus at least - 6 bytes of entry */ - - if (dm->length < 17) - return; - - da_command_address = table->cmdIOAddress; - da_command_code = table->cmdIOCode; - - da_tokens = krealloc(da_tokens, (da_num_tokens + tokens) * - sizeof(struct calling_interface_token), - GFP_KERNEL); - - if (!da_tokens) - return; - - memcpy(da_tokens+da_num_tokens, table->tokens, - sizeof(struct calling_interface_token) * tokens); - - da_num_tokens += tokens; -} - -static void find_tokens(const struct dmi_header *dm) -{ - switch (dm->type) { - case 0xd4: /* Indexed IO */ - break; - case 0xd5: /* Protected Area Type 1 */ - break; - case 0xd6: /* Protected Area Type 2 */ - break; - case 0xda: /* Calling interface */ - parse_da_table(dm); - break; - } -} - -static int find_token_location(int tokenid) -{ - int i; - for (i = 0; i < da_num_tokens; i++) { - if (da_tokens[i].tokenID == tokenid) - return da_tokens[i].location; - } - - return -1; -} - -static struct calling_interface_buffer * -dell_send_request(struct calling_interface_buffer *buffer, int class, - int select) -{ - struct smi_cmd command; - - command.magic = SMI_CMD_MAGIC; - command.command_address = da_command_address; - command.command_code = da_command_code; - command.ebx = virt_to_phys(buffer); - command.ecx = 0x42534931; - - buffer->class = class; - buffer->select = select; - - dcdbas_smi_request(&command); - - return buffer; -} - -/* Derived from information in DellWirelessCtl.cpp: - Class 17, select 11 is radio control. It returns an array of 32-bit values. - - result[0]: return code - result[1]: - Bit 0: Hardware switch supported - Bit 1: Wifi locator supported - Bit 2: Wifi is supported - Bit 3: Bluetooth is supported - Bit 4: WWAN is supported - Bit 5: Wireless keyboard supported - Bits 6-7: Reserved - Bit 8: Wifi is installed - Bit 9: Bluetooth is installed - Bit 10: WWAN is installed - Bits 11-15: Reserved - Bit 16: Hardware switch is on - Bit 17: Wifi is blocked - Bit 18: Bluetooth is blocked - Bit 19: WWAN is blocked - Bits 20-31: Reserved - result[2]: NVRAM size in bytes - result[3]: NVRAM format version number -*/ - -static int dell_rfkill_set(int radio, enum rfkill_state state) -{ - struct calling_interface_buffer buffer; - int disable = (state == RFKILL_STATE_UNBLOCKED) ? 0 : 1; - - memset(&buffer, 0, sizeof(struct calling_interface_buffer)); - buffer.input[0] = (1 | (radio<<8) | (disable << 16)); - dell_send_request(&buffer, 17, 11); - - return 0; -} - -static int dell_wifi_set(void *data, enum rfkill_state state) -{ - return dell_rfkill_set(1, state); -} - -static int dell_bluetooth_set(void *data, enum rfkill_state state) -{ - return dell_rfkill_set(2, state); -} - -static int dell_wwan_set(void *data, enum rfkill_state state) -{ - return dell_rfkill_set(3, state); -} - -static int dell_rfkill_get(int bit, enum rfkill_state *state) -{ - struct calling_interface_buffer buffer; - int status; - int new_state = RFKILL_STATE_HARD_BLOCKED; - - memset(&buffer, 0, sizeof(struct calling_interface_buffer)); - dell_send_request(&buffer, 17, 11); - status = buffer.output[1]; - - if (status & (1<<16)) - new_state = RFKILL_STATE_SOFT_BLOCKED; - - if (status & (1<name = "dell-wifi"; - wifi_rfkill->toggle_radio = dell_wifi_set; - wifi_rfkill->get_state = dell_wifi_get; - ret = rfkill_register(wifi_rfkill); - if (ret) - goto err_wifi; - } - - if ((status & (1<<3|1<<9)) == (1<<3|1<<9)) { - bluetooth_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_BLUETOOTH); - if (!bluetooth_rfkill) - goto err_bluetooth; - bluetooth_rfkill->name = "dell-bluetooth"; - bluetooth_rfkill->toggle_radio = dell_bluetooth_set; - bluetooth_rfkill->get_state = dell_bluetooth_get; - ret = rfkill_register(bluetooth_rfkill); - if (ret) - goto err_bluetooth; - } - - if ((status & (1<<4|1<<10)) == (1<<4|1<<10)) { - wwan_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_WWAN); - if (!wwan_rfkill) - goto err_wwan; - wwan_rfkill->name = "dell-wwan"; - wwan_rfkill->toggle_radio = dell_wwan_set; - wwan_rfkill->get_state = dell_wwan_get; - ret = rfkill_register(wwan_rfkill); - if (ret) - goto err_wwan; - } - - return 0; -err_wwan: - if (wwan_rfkill) - rfkill_free(wwan_rfkill); - if (bluetooth_rfkill) { - rfkill_unregister(bluetooth_rfkill); - bluetooth_rfkill = NULL; - } -err_bluetooth: - if (bluetooth_rfkill) - rfkill_free(bluetooth_rfkill); - if (wifi_rfkill) { - rfkill_unregister(wifi_rfkill); - wifi_rfkill = NULL; - } -err_wifi: - if (wifi_rfkill) - rfkill_free(wifi_rfkill); - - return ret; -} - -static int dell_send_intensity(struct backlight_device *bd) -{ - struct calling_interface_buffer buffer; - - memset(&buffer, 0, sizeof(struct calling_interface_buffer)); - buffer.input[0] = find_token_location(BRIGHTNESS_TOKEN); - buffer.input[1] = bd->props.brightness; - - if (buffer.input[0] == -1) - return -ENODEV; - - if (power_supply_is_system_supplied() > 0) - dell_send_request(&buffer, 1, 2); - else - dell_send_request(&buffer, 1, 1); - - return 0; -} - -static int dell_get_intensity(struct backlight_device *bd) -{ - struct calling_interface_buffer buffer; - - memset(&buffer, 0, sizeof(struct calling_interface_buffer)); - buffer.input[0] = find_token_location(BRIGHTNESS_TOKEN); - - if (buffer.input[0] == -1) - return -ENODEV; - - if (power_supply_is_system_supplied() > 0) - dell_send_request(&buffer, 0, 2); - else - dell_send_request(&buffer, 0, 1); - - return buffer.output[1]; -} - -static struct backlight_ops dell_ops = { - .get_brightness = dell_get_intensity, - .update_status = dell_send_intensity, -}; - -static int __init dell_init(void) -{ - struct calling_interface_buffer buffer; - int max_intensity = 0; - int ret; - - if (!dmi_check_system(dell_device_table)) - return -ENODEV; - - dmi_walk(find_tokens); - - if (!da_tokens) { - printk(KERN_INFO "dell-laptop: Unable to find dmi tokens\n"); - return -ENODEV; - } - - ret = dell_setup_rfkill(); - - if (ret) { - printk(KERN_WARNING "dell-laptop: Unable to setup rfkill\n"); - goto out; - } - -#ifdef CONFIG_ACPI - /* In the event of an ACPI backlight being available, don't - * register the platform controller. - */ - if (acpi_video_backlight_support()) - return 0; -#endif - - memset(&buffer, 0, sizeof(struct calling_interface_buffer)); - buffer.input[0] = find_token_location(BRIGHTNESS_TOKEN); - - if (buffer.input[0] != -1) { - dell_send_request(&buffer, 0, 2); - max_intensity = buffer.output[3]; - } - - if (max_intensity) { - dell_backlight_device = backlight_device_register( - "dell_backlight", - NULL, NULL, - &dell_ops); - - if (IS_ERR(dell_backlight_device)) { - ret = PTR_ERR(dell_backlight_device); - dell_backlight_device = NULL; - goto out; - } - - dell_backlight_device->props.max_brightness = max_intensity; - dell_backlight_device->props.brightness = - dell_get_intensity(dell_backlight_device); - backlight_update_status(dell_backlight_device); - } - - return 0; -out: - if (wifi_rfkill) - rfkill_unregister(wifi_rfkill); - if (bluetooth_rfkill) - rfkill_unregister(bluetooth_rfkill); - if (wwan_rfkill) - rfkill_unregister(wwan_rfkill); - kfree(da_tokens); - return ret; -} - -static void __exit dell_exit(void) -{ - backlight_device_unregister(dell_backlight_device); - if (wifi_rfkill) - rfkill_unregister(wifi_rfkill); - if (bluetooth_rfkill) - rfkill_unregister(bluetooth_rfkill); - if (wwan_rfkill) - rfkill_unregister(wwan_rfkill); -} - -module_init(dell_init); -module_exit(dell_exit); - -MODULE_AUTHOR("Matthew Garrett "); -MODULE_DESCRIPTION("Dell laptop driver"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("dmi:*svnDellInc.:*:ct8:*"); diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index e65448e99b4..9e8f9485f9c 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -54,6 +54,18 @@ config ASUS_LAPTOP If you have an ACPI-compatible ASUS laptop, say Y or M here. +config DELL_LAPTOP + tristate "Dell Laptop Extras (EXPERIMENTAL)" + depends on X86 + depends on DCDBAS + depends on EXPERIMENTAL + depends on BACKLIGHT_CLASS_DEVICE + depends on RFKILL + default n + ---help--- + This driver adds support for rfkill and backlight control to Dell + laptops. + config FUJITSU_LAPTOP tristate "Fujitsu Laptop Extras" depends on ACPI diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index 1e9de2ae0de..e29065120be 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -6,6 +6,7 @@ obj-$(CONFIG_ASUS_LAPTOP) += asus-laptop.o obj-$(CONFIG_EEEPC_LAPTOP) += eeepc-laptop.o obj-$(CONFIG_MSI_LAPTOP) += msi-laptop.o obj-$(CONFIG_COMPAL_LAPTOP) += compal-laptop.o +obj-$(CONFIG_DELL_LAPTOP) += dell-laptop.o obj-$(CONFIG_ACER_WMI) += acer-wmi.o obj-$(CONFIG_HP_WMI) += hp-wmi.o obj-$(CONFIG_TC1100_WMI) += tc1100-wmi.o diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c new file mode 100644 index 00000000000..16e11c2ee19 --- /dev/null +++ b/drivers/platform/x86/dell-laptop.c @@ -0,0 +1,436 @@ +/* + * Driver for Dell laptop extras + * + * Copyright (c) Red Hat + * + * Based on documentation in the libsmbios package, Copyright (C) 2005 Dell + * Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../../firmware/dcdbas.h" + +#define BRIGHTNESS_TOKEN 0x7d + +/* This structure will be modified by the firmware when we enter + * system management mode, hence the volatiles */ + +struct calling_interface_buffer { + u16 class; + u16 select; + volatile u32 input[4]; + volatile u32 output[4]; +} __packed; + +struct calling_interface_token { + u16 tokenID; + u16 location; + union { + u16 value; + u16 stringlength; + }; +}; + +struct calling_interface_structure { + struct dmi_header header; + u16 cmdIOAddress; + u8 cmdIOCode; + u32 supportedCmds; + struct calling_interface_token tokens[]; +} __packed; + +static int da_command_address; +static int da_command_code; +static int da_num_tokens; +static struct calling_interface_token *da_tokens; + +static struct backlight_device *dell_backlight_device; +static struct rfkill *wifi_rfkill; +static struct rfkill *bluetooth_rfkill; +static struct rfkill *wwan_rfkill; + +static const struct dmi_system_id __initdata dell_device_table[] = { + { + .ident = "Dell laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_CHASSIS_TYPE, "8"), + }, + }, + { } +}; + +static void parse_da_table(const struct dmi_header *dm) +{ + /* Final token is a terminator, so we don't want to copy it */ + int tokens = (dm->length-11)/sizeof(struct calling_interface_token)-1; + struct calling_interface_structure *table = + container_of(dm, struct calling_interface_structure, header); + + /* 4 bytes of table header, plus 7 bytes of Dell header, plus at least + 6 bytes of entry */ + + if (dm->length < 17) + return; + + da_command_address = table->cmdIOAddress; + da_command_code = table->cmdIOCode; + + da_tokens = krealloc(da_tokens, (da_num_tokens + tokens) * + sizeof(struct calling_interface_token), + GFP_KERNEL); + + if (!da_tokens) + return; + + memcpy(da_tokens+da_num_tokens, table->tokens, + sizeof(struct calling_interface_token) * tokens); + + da_num_tokens += tokens; +} + +static void find_tokens(const struct dmi_header *dm) +{ + switch (dm->type) { + case 0xd4: /* Indexed IO */ + break; + case 0xd5: /* Protected Area Type 1 */ + break; + case 0xd6: /* Protected Area Type 2 */ + break; + case 0xda: /* Calling interface */ + parse_da_table(dm); + break; + } +} + +static int find_token_location(int tokenid) +{ + int i; + for (i = 0; i < da_num_tokens; i++) { + if (da_tokens[i].tokenID == tokenid) + return da_tokens[i].location; + } + + return -1; +} + +static struct calling_interface_buffer * +dell_send_request(struct calling_interface_buffer *buffer, int class, + int select) +{ + struct smi_cmd command; + + command.magic = SMI_CMD_MAGIC; + command.command_address = da_command_address; + command.command_code = da_command_code; + command.ebx = virt_to_phys(buffer); + command.ecx = 0x42534931; + + buffer->class = class; + buffer->select = select; + + dcdbas_smi_request(&command); + + return buffer; +} + +/* Derived from information in DellWirelessCtl.cpp: + Class 17, select 11 is radio control. It returns an array of 32-bit values. + + result[0]: return code + result[1]: + Bit 0: Hardware switch supported + Bit 1: Wifi locator supported + Bit 2: Wifi is supported + Bit 3: Bluetooth is supported + Bit 4: WWAN is supported + Bit 5: Wireless keyboard supported + Bits 6-7: Reserved + Bit 8: Wifi is installed + Bit 9: Bluetooth is installed + Bit 10: WWAN is installed + Bits 11-15: Reserved + Bit 16: Hardware switch is on + Bit 17: Wifi is blocked + Bit 18: Bluetooth is blocked + Bit 19: WWAN is blocked + Bits 20-31: Reserved + result[2]: NVRAM size in bytes + result[3]: NVRAM format version number +*/ + +static int dell_rfkill_set(int radio, enum rfkill_state state) +{ + struct calling_interface_buffer buffer; + int disable = (state == RFKILL_STATE_UNBLOCKED) ? 0 : 1; + + memset(&buffer, 0, sizeof(struct calling_interface_buffer)); + buffer.input[0] = (1 | (radio<<8) | (disable << 16)); + dell_send_request(&buffer, 17, 11); + + return 0; +} + +static int dell_wifi_set(void *data, enum rfkill_state state) +{ + return dell_rfkill_set(1, state); +} + +static int dell_bluetooth_set(void *data, enum rfkill_state state) +{ + return dell_rfkill_set(2, state); +} + +static int dell_wwan_set(void *data, enum rfkill_state state) +{ + return dell_rfkill_set(3, state); +} + +static int dell_rfkill_get(int bit, enum rfkill_state *state) +{ + struct calling_interface_buffer buffer; + int status; + int new_state = RFKILL_STATE_HARD_BLOCKED; + + memset(&buffer, 0, sizeof(struct calling_interface_buffer)); + dell_send_request(&buffer, 17, 11); + status = buffer.output[1]; + + if (status & (1<<16)) + new_state = RFKILL_STATE_SOFT_BLOCKED; + + if (status & (1<name = "dell-wifi"; + wifi_rfkill->toggle_radio = dell_wifi_set; + wifi_rfkill->get_state = dell_wifi_get; + ret = rfkill_register(wifi_rfkill); + if (ret) + goto err_wifi; + } + + if ((status & (1<<3|1<<9)) == (1<<3|1<<9)) { + bluetooth_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_BLUETOOTH); + if (!bluetooth_rfkill) + goto err_bluetooth; + bluetooth_rfkill->name = "dell-bluetooth"; + bluetooth_rfkill->toggle_radio = dell_bluetooth_set; + bluetooth_rfkill->get_state = dell_bluetooth_get; + ret = rfkill_register(bluetooth_rfkill); + if (ret) + goto err_bluetooth; + } + + if ((status & (1<<4|1<<10)) == (1<<4|1<<10)) { + wwan_rfkill = rfkill_allocate(NULL, RFKILL_TYPE_WWAN); + if (!wwan_rfkill) + goto err_wwan; + wwan_rfkill->name = "dell-wwan"; + wwan_rfkill->toggle_radio = dell_wwan_set; + wwan_rfkill->get_state = dell_wwan_get; + ret = rfkill_register(wwan_rfkill); + if (ret) + goto err_wwan; + } + + return 0; +err_wwan: + if (wwan_rfkill) + rfkill_free(wwan_rfkill); + if (bluetooth_rfkill) { + rfkill_unregister(bluetooth_rfkill); + bluetooth_rfkill = NULL; + } +err_bluetooth: + if (bluetooth_rfkill) + rfkill_free(bluetooth_rfkill); + if (wifi_rfkill) { + rfkill_unregister(wifi_rfkill); + wifi_rfkill = NULL; + } +err_wifi: + if (wifi_rfkill) + rfkill_free(wifi_rfkill); + + return ret; +} + +static int dell_send_intensity(struct backlight_device *bd) +{ + struct calling_interface_buffer buffer; + + memset(&buffer, 0, sizeof(struct calling_interface_buffer)); + buffer.input[0] = find_token_location(BRIGHTNESS_TOKEN); + buffer.input[1] = bd->props.brightness; + + if (buffer.input[0] == -1) + return -ENODEV; + + if (power_supply_is_system_supplied() > 0) + dell_send_request(&buffer, 1, 2); + else + dell_send_request(&buffer, 1, 1); + + return 0; +} + +static int dell_get_intensity(struct backlight_device *bd) +{ + struct calling_interface_buffer buffer; + + memset(&buffer, 0, sizeof(struct calling_interface_buffer)); + buffer.input[0] = find_token_location(BRIGHTNESS_TOKEN); + + if (buffer.input[0] == -1) + return -ENODEV; + + if (power_supply_is_system_supplied() > 0) + dell_send_request(&buffer, 0, 2); + else + dell_send_request(&buffer, 0, 1); + + return buffer.output[1]; +} + +static struct backlight_ops dell_ops = { + .get_brightness = dell_get_intensity, + .update_status = dell_send_intensity, +}; + +static int __init dell_init(void) +{ + struct calling_interface_buffer buffer; + int max_intensity = 0; + int ret; + + if (!dmi_check_system(dell_device_table)) + return -ENODEV; + + dmi_walk(find_tokens); + + if (!da_tokens) { + printk(KERN_INFO "dell-laptop: Unable to find dmi tokens\n"); + return -ENODEV; + } + + ret = dell_setup_rfkill(); + + if (ret) { + printk(KERN_WARNING "dell-laptop: Unable to setup rfkill\n"); + goto out; + } + +#ifdef CONFIG_ACPI + /* In the event of an ACPI backlight being available, don't + * register the platform controller. + */ + if (acpi_video_backlight_support()) + return 0; +#endif + + memset(&buffer, 0, sizeof(struct calling_interface_buffer)); + buffer.input[0] = find_token_location(BRIGHTNESS_TOKEN); + + if (buffer.input[0] != -1) { + dell_send_request(&buffer, 0, 2); + max_intensity = buffer.output[3]; + } + + if (max_intensity) { + dell_backlight_device = backlight_device_register( + "dell_backlight", + NULL, NULL, + &dell_ops); + + if (IS_ERR(dell_backlight_device)) { + ret = PTR_ERR(dell_backlight_device); + dell_backlight_device = NULL; + goto out; + } + + dell_backlight_device->props.max_brightness = max_intensity; + dell_backlight_device->props.brightness = + dell_get_intensity(dell_backlight_device); + backlight_update_status(dell_backlight_device); + } + + return 0; +out: + if (wifi_rfkill) + rfkill_unregister(wifi_rfkill); + if (bluetooth_rfkill) + rfkill_unregister(bluetooth_rfkill); + if (wwan_rfkill) + rfkill_unregister(wwan_rfkill); + kfree(da_tokens); + return ret; +} + +static void __exit dell_exit(void) +{ + backlight_device_unregister(dell_backlight_device); + if (wifi_rfkill) + rfkill_unregister(wifi_rfkill); + if (bluetooth_rfkill) + rfkill_unregister(bluetooth_rfkill); + if (wwan_rfkill) + rfkill_unregister(wwan_rfkill); +} + +module_init(dell_init); +module_exit(dell_exit); + +MODULE_AUTHOR("Matthew Garrett "); +MODULE_DESCRIPTION("Dell laptop driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("dmi:*svnDellInc.:*:ct8:*"); -- cgit From d08ca2ca743f324eceba59e93188f9439e966bce Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 16 Jan 2009 13:52:03 -0500 Subject: ACPI: rename main.c to sleep.c Signed-off-by: Len Brown --- drivers/acpi/Makefile | 2 +- drivers/acpi/main.c | 747 -------------------------------------------------- drivers/acpi/sleep.c | 747 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 748 insertions(+), 748 deletions(-) delete mode 100644 drivers/acpi/main.c create mode 100644 drivers/acpi/sleep.c diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index d80f4cc2e0d..65d90c720b5 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -19,7 +19,7 @@ obj-y += osl.o utils.o reboot.o\ # sleep related files obj-y += wakeup.o -obj-y += main.o +obj-y += sleep.o obj-$(CONFIG_ACPI_SLEEP) += proc.o diff --git a/drivers/acpi/main.c b/drivers/acpi/main.c deleted file mode 100644 index 7e3c609cbef..00000000000 --- a/drivers/acpi/main.c +++ /dev/null @@ -1,747 +0,0 @@ -/* - * sleep.c - ACPI sleep support. - * - * Copyright (c) 2005 Alexey Starikovskiy - * Copyright (c) 2004 David Shaohua Li - * Copyright (c) 2000-2003 Patrick Mochel - * Copyright (c) 2003 Open Source Development Lab - * - * This file is released under the GPLv2. - * - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include -#include -#include "sleep.h" - -u8 sleep_states[ACPI_S_STATE_COUNT]; - -static void acpi_sleep_tts_switch(u32 acpi_state) -{ - union acpi_object in_arg = { ACPI_TYPE_INTEGER }; - struct acpi_object_list arg_list = { 1, &in_arg }; - acpi_status status = AE_OK; - - in_arg.integer.value = acpi_state; - status = acpi_evaluate_object(NULL, "\\_TTS", &arg_list, NULL); - if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { - /* - * OS can't evaluate the _TTS object correctly. Some warning - * message will be printed. But it won't break anything. - */ - printk(KERN_NOTICE "Failure in evaluating _TTS object\n"); - } -} - -static int tts_notify_reboot(struct notifier_block *this, - unsigned long code, void *x) -{ - acpi_sleep_tts_switch(ACPI_STATE_S5); - return NOTIFY_DONE; -} - -static struct notifier_block tts_notifier = { - .notifier_call = tts_notify_reboot, - .next = NULL, - .priority = 0, -}; - -static int acpi_sleep_prepare(u32 acpi_state) -{ -#ifdef CONFIG_ACPI_SLEEP - /* do we have a wakeup address for S2 and S3? */ - if (acpi_state == ACPI_STATE_S3) { - if (!acpi_wakeup_address) { - return -EFAULT; - } - acpi_set_firmware_waking_vector( - (acpi_physical_address)acpi_wakeup_address); - - } - ACPI_FLUSH_CPU_CACHE(); - acpi_enable_wakeup_device_prep(acpi_state); -#endif - printk(KERN_INFO PREFIX "Preparing to enter system sleep state S%d\n", - acpi_state); - acpi_enter_sleep_state_prep(acpi_state); - return 0; -} - -#ifdef CONFIG_ACPI_SLEEP -static u32 acpi_target_sleep_state = ACPI_STATE_S0; -/* - * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the - * user to request that behavior by using the 'acpi_old_suspend_ordering' - * kernel command line option that causes the following variable to be set. - */ -static bool old_suspend_ordering; - -void __init acpi_old_suspend_ordering(void) -{ - old_suspend_ordering = true; -} - -/* - * According to the ACPI specification the BIOS should make sure that ACPI is - * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, - * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI - * on such systems during resume. Unfortunately that doesn't help in - * particularly pathological cases in which SCI_EN has to be set directly on - * resume, although the specification states very clearly that this flag is - * owned by the hardware. The set_sci_en_on_resume variable will be set in such - * cases. - */ -static bool set_sci_en_on_resume; -/* - * The ACPI specification wants us to save NVS memory regions during hibernation - * and to restore them during the subsequent resume. However, it is not certain - * if this mechanism is going to work on all machines, so we allow the user to - * disable this mechanism using the 'acpi_sleep=s4_nonvs' kernel command line - * option. - */ -static bool s4_no_nvs; - -void __init acpi_s4_no_nvs(void) -{ - s4_no_nvs = true; -} - -/** - * acpi_pm_disable_gpes - Disable the GPEs. - */ -static int acpi_pm_disable_gpes(void) -{ - acpi_disable_all_gpes(); - return 0; -} - -/** - * __acpi_pm_prepare - Prepare the platform to enter the target state. - * - * If necessary, set the firmware waking vector and do arch-specific - * nastiness to get the wakeup code to the waking vector. - */ -static int __acpi_pm_prepare(void) -{ - int error = acpi_sleep_prepare(acpi_target_sleep_state); - - if (error) - acpi_target_sleep_state = ACPI_STATE_S0; - return error; -} - -/** - * acpi_pm_prepare - Prepare the platform to enter the target sleep - * state and disable the GPEs. - */ -static int acpi_pm_prepare(void) -{ - int error = __acpi_pm_prepare(); - - if (!error) - acpi_disable_all_gpes(); - return error; -} - -/** - * acpi_pm_finish - Instruct the platform to leave a sleep state. - * - * This is called after we wake back up (or if entering the sleep state - * failed). - */ -static void acpi_pm_finish(void) -{ - u32 acpi_state = acpi_target_sleep_state; - - if (acpi_state == ACPI_STATE_S0) - return; - - printk(KERN_INFO PREFIX "Waking up from system sleep state S%d\n", - acpi_state); - acpi_disable_wakeup_device(acpi_state); - acpi_leave_sleep_state(acpi_state); - - /* reset firmware waking vector */ - acpi_set_firmware_waking_vector((acpi_physical_address) 0); - - acpi_target_sleep_state = ACPI_STATE_S0; -} - -/** - * acpi_pm_end - Finish up suspend sequence. - */ -static void acpi_pm_end(void) -{ - /* - * This is necessary in case acpi_pm_finish() is not called during a - * failing transition to a sleep state. - */ - acpi_target_sleep_state = ACPI_STATE_S0; - acpi_sleep_tts_switch(acpi_target_sleep_state); -} -#else /* !CONFIG_ACPI_SLEEP */ -#define acpi_target_sleep_state ACPI_STATE_S0 -#endif /* CONFIG_ACPI_SLEEP */ - -#ifdef CONFIG_SUSPEND -extern void do_suspend_lowlevel(void); - -static u32 acpi_suspend_states[] = { - [PM_SUSPEND_ON] = ACPI_STATE_S0, - [PM_SUSPEND_STANDBY] = ACPI_STATE_S1, - [PM_SUSPEND_MEM] = ACPI_STATE_S3, - [PM_SUSPEND_MAX] = ACPI_STATE_S5 -}; - -/** - * acpi_suspend_begin - Set the target system sleep state to the state - * associated with given @pm_state, if supported. - */ -static int acpi_suspend_begin(suspend_state_t pm_state) -{ - u32 acpi_state = acpi_suspend_states[pm_state]; - int error = 0; - - if (sleep_states[acpi_state]) { - acpi_target_sleep_state = acpi_state; - acpi_sleep_tts_switch(acpi_target_sleep_state); - } else { - printk(KERN_ERR "ACPI does not support this state: %d\n", - pm_state); - error = -ENOSYS; - } - return error; -} - -/** - * acpi_suspend_enter - Actually enter a sleep state. - * @pm_state: ignored - * - * Flush caches and go to sleep. For STR we have to call arch-specific - * assembly, which in turn call acpi_enter_sleep_state(). - * It's unfortunate, but it works. Please fix if you're feeling frisky. - */ -static int acpi_suspend_enter(suspend_state_t pm_state) -{ - acpi_status status = AE_OK; - unsigned long flags = 0; - u32 acpi_state = acpi_target_sleep_state; - - ACPI_FLUSH_CPU_CACHE(); - - /* Do arch specific saving of state. */ - if (acpi_state == ACPI_STATE_S3) { - int error = acpi_save_state_mem(); - - if (error) - return error; - } - - local_irq_save(flags); - acpi_enable_wakeup_device(acpi_state); - switch (acpi_state) { - case ACPI_STATE_S1: - barrier(); - status = acpi_enter_sleep_state(acpi_state); - break; - - case ACPI_STATE_S3: - do_suspend_lowlevel(); - break; - } - - /* If ACPI is not enabled by the BIOS, we need to enable it here. */ - if (set_sci_en_on_resume) - acpi_set_register(ACPI_BITREG_SCI_ENABLE, 1); - else - acpi_enable(); - - /* Reprogram control registers and execute _BFS */ - acpi_leave_sleep_state_prep(acpi_state); - - /* ACPI 3.0 specs (P62) says that it's the responsibility - * of the OSPM to clear the status bit [ implying that the - * POWER_BUTTON event should not reach userspace ] - */ - if (ACPI_SUCCESS(status) && (acpi_state == ACPI_STATE_S3)) - acpi_clear_event(ACPI_EVENT_POWER_BUTTON); - - /* - * Disable and clear GPE status before interrupt is enabled. Some GPEs - * (like wakeup GPE) haven't handler, this can avoid such GPE misfire. - * acpi_leave_sleep_state will reenable specific GPEs later - */ - acpi_disable_all_gpes(); - - local_irq_restore(flags); - printk(KERN_DEBUG "Back to C!\n"); - - /* restore processor state */ - if (acpi_state == ACPI_STATE_S3) - acpi_restore_state_mem(); - - return ACPI_SUCCESS(status) ? 0 : -EFAULT; -} - -static int acpi_suspend_state_valid(suspend_state_t pm_state) -{ - u32 acpi_state; - - switch (pm_state) { - case PM_SUSPEND_ON: - case PM_SUSPEND_STANDBY: - case PM_SUSPEND_MEM: - acpi_state = acpi_suspend_states[pm_state]; - - return sleep_states[acpi_state]; - default: - return 0; - } -} - -static struct platform_suspend_ops acpi_suspend_ops = { - .valid = acpi_suspend_state_valid, - .begin = acpi_suspend_begin, - .prepare = acpi_pm_prepare, - .enter = acpi_suspend_enter, - .finish = acpi_pm_finish, - .end = acpi_pm_end, -}; - -/** - * acpi_suspend_begin_old - Set the target system sleep state to the - * state associated with given @pm_state, if supported, and - * execute the _PTS control method. This function is used if the - * pre-ACPI 2.0 suspend ordering has been requested. - */ -static int acpi_suspend_begin_old(suspend_state_t pm_state) -{ - int error = acpi_suspend_begin(pm_state); - - if (!error) - error = __acpi_pm_prepare(); - return error; -} - -/* - * The following callbacks are used if the pre-ACPI 2.0 suspend ordering has - * been requested. - */ -static struct platform_suspend_ops acpi_suspend_ops_old = { - .valid = acpi_suspend_state_valid, - .begin = acpi_suspend_begin_old, - .prepare = acpi_pm_disable_gpes, - .enter = acpi_suspend_enter, - .finish = acpi_pm_finish, - .end = acpi_pm_end, - .recover = acpi_pm_finish, -}; - -static int __init init_old_suspend_ordering(const struct dmi_system_id *d) -{ - old_suspend_ordering = true; - return 0; -} - -static int __init init_set_sci_en_on_resume(const struct dmi_system_id *d) -{ - set_sci_en_on_resume = true; - return 0; -} - -static struct dmi_system_id __initdata acpisleep_dmi_table[] = { - { - .callback = init_old_suspend_ordering, - .ident = "Abit KN9 (nForce4 variant)", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "http://www.abit.com.tw/"), - DMI_MATCH(DMI_BOARD_NAME, "KN9 Series(NF-CK804)"), - }, - }, - { - .callback = init_old_suspend_ordering, - .ident = "HP xw4600 Workstation", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), - DMI_MATCH(DMI_PRODUCT_NAME, "HP xw4600 Workstation"), - }, - }, - { - .callback = init_set_sci_en_on_resume, - .ident = "Apple MacBook 1,1", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Apple Computer, Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "MacBook1,1"), - }, - }, - { - .callback = init_set_sci_en_on_resume, - .ident = "Apple MacMini 1,1", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Apple Computer, Inc."), - DMI_MATCH(DMI_PRODUCT_NAME, "Macmini1,1"), - }, - }, - {}, -}; -#endif /* CONFIG_SUSPEND */ - -#ifdef CONFIG_HIBERNATION -static unsigned long s4_hardware_signature; -static struct acpi_table_facs *facs; -static bool nosigcheck; - -void __init acpi_no_s4_hw_signature(void) -{ - nosigcheck = true; -} - -static int acpi_hibernation_begin(void) -{ - int error; - - error = s4_no_nvs ? 0 : hibernate_nvs_alloc(); - if (!error) { - acpi_target_sleep_state = ACPI_STATE_S4; - acpi_sleep_tts_switch(acpi_target_sleep_state); - } - - return error; -} - -static int acpi_hibernation_pre_snapshot(void) -{ - int error = acpi_pm_prepare(); - - if (!error) - hibernate_nvs_save(); - - return error; -} - -static int acpi_hibernation_enter(void) -{ - acpi_status status = AE_OK; - unsigned long flags = 0; - - ACPI_FLUSH_CPU_CACHE(); - - local_irq_save(flags); - acpi_enable_wakeup_device(ACPI_STATE_S4); - /* This shouldn't return. If it returns, we have a problem */ - status = acpi_enter_sleep_state(ACPI_STATE_S4); - /* Reprogram control registers and execute _BFS */ - acpi_leave_sleep_state_prep(ACPI_STATE_S4); - local_irq_restore(flags); - - return ACPI_SUCCESS(status) ? 0 : -EFAULT; -} - -static void acpi_hibernation_finish(void) -{ - hibernate_nvs_free(); - acpi_pm_finish(); -} - -static void acpi_hibernation_leave(void) -{ - /* - * If ACPI is not enabled by the BIOS and the boot kernel, we need to - * enable it here. - */ - acpi_enable(); - /* Reprogram control registers and execute _BFS */ - acpi_leave_sleep_state_prep(ACPI_STATE_S4); - /* Check the hardware signature */ - if (facs && s4_hardware_signature != facs->hardware_signature) { - printk(KERN_EMERG "ACPI: Hardware changed while hibernated, " - "cannot resume!\n"); - panic("ACPI S4 hardware signature mismatch"); - } - /* Restore the NVS memory area */ - hibernate_nvs_restore(); -} - -static void acpi_pm_enable_gpes(void) -{ - acpi_enable_all_runtime_gpes(); -} - -static struct platform_hibernation_ops acpi_hibernation_ops = { - .begin = acpi_hibernation_begin, - .end = acpi_pm_end, - .pre_snapshot = acpi_hibernation_pre_snapshot, - .finish = acpi_hibernation_finish, - .prepare = acpi_pm_prepare, - .enter = acpi_hibernation_enter, - .leave = acpi_hibernation_leave, - .pre_restore = acpi_pm_disable_gpes, - .restore_cleanup = acpi_pm_enable_gpes, -}; - -/** - * acpi_hibernation_begin_old - Set the target system sleep state to - * ACPI_STATE_S4 and execute the _PTS control method. This - * function is used if the pre-ACPI 2.0 suspend ordering has been - * requested. - */ -static int acpi_hibernation_begin_old(void) -{ - int error; - /* - * The _TTS object should always be evaluated before the _PTS object. - * When the old_suspended_ordering is true, the _PTS object is - * evaluated in the acpi_sleep_prepare. - */ - acpi_sleep_tts_switch(ACPI_STATE_S4); - - error = acpi_sleep_prepare(ACPI_STATE_S4); - - if (!error) { - if (!s4_no_nvs) - error = hibernate_nvs_alloc(); - if (!error) - acpi_target_sleep_state = ACPI_STATE_S4; - } - return error; -} - -static int acpi_hibernation_pre_snapshot_old(void) -{ - int error = acpi_pm_disable_gpes(); - - if (!error) - hibernate_nvs_save(); - - return error; -} - -/* - * The following callbacks are used if the pre-ACPI 2.0 suspend ordering has - * been requested. - */ -static struct platform_hibernation_ops acpi_hibernation_ops_old = { - .begin = acpi_hibernation_begin_old, - .end = acpi_pm_end, - .pre_snapshot = acpi_hibernation_pre_snapshot_old, - .finish = acpi_hibernation_finish, - .prepare = acpi_pm_disable_gpes, - .enter = acpi_hibernation_enter, - .leave = acpi_hibernation_leave, - .pre_restore = acpi_pm_disable_gpes, - .restore_cleanup = acpi_pm_enable_gpes, - .recover = acpi_pm_finish, -}; -#endif /* CONFIG_HIBERNATION */ - -int acpi_suspend(u32 acpi_state) -{ - suspend_state_t states[] = { - [1] = PM_SUSPEND_STANDBY, - [3] = PM_SUSPEND_MEM, - [5] = PM_SUSPEND_MAX - }; - - if (acpi_state < 6 && states[acpi_state]) - return pm_suspend(states[acpi_state]); - if (acpi_state == 4) - return hibernate(); - return -EINVAL; -} - -#ifdef CONFIG_PM_SLEEP -/** - * acpi_pm_device_sleep_state - return preferred power state of ACPI device - * in the system sleep state given by %acpi_target_sleep_state - * @dev: device to examine; its driver model wakeup flags control - * whether it should be able to wake up the system - * @d_min_p: used to store the upper limit of allowed states range - * Return value: preferred power state of the device on success, -ENODEV on - * failure (ie. if there's no 'struct acpi_device' for @dev) - * - * Find the lowest power (highest number) ACPI device power state that - * device @dev can be in while the system is in the sleep state represented - * by %acpi_target_sleep_state. If @wake is nonzero, the device should be - * able to wake up the system from this sleep state. If @d_min_p is set, - * the highest power (lowest number) device power state of @dev allowed - * in this system sleep state is stored at the location pointed to by it. - * - * The caller must ensure that @dev is valid before using this function. - * The caller is also responsible for figuring out if the device is - * supposed to be able to wake up the system and passing this information - * via @wake. - */ - -int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p) -{ - acpi_handle handle = DEVICE_ACPI_HANDLE(dev); - struct acpi_device *adev; - char acpi_method[] = "_SxD"; - unsigned long long d_min, d_max; - - if (!handle || ACPI_FAILURE(acpi_bus_get_device(handle, &adev))) { - printk(KERN_DEBUG "ACPI handle has no context!\n"); - return -ENODEV; - } - - acpi_method[2] = '0' + acpi_target_sleep_state; - /* - * If the sleep state is S0, we will return D3, but if the device has - * _S0W, we will use the value from _S0W - */ - d_min = ACPI_STATE_D0; - d_max = ACPI_STATE_D3; - - /* - * If present, _SxD methods return the minimum D-state (highest power - * state) we can use for the corresponding S-states. Otherwise, the - * minimum D-state is D0 (ACPI 3.x). - * - * NOTE: We rely on acpi_evaluate_integer() not clobbering the integer - * provided -- that's our fault recovery, we ignore retval. - */ - if (acpi_target_sleep_state > ACPI_STATE_S0) - acpi_evaluate_integer(handle, acpi_method, NULL, &d_min); - - /* - * If _PRW says we can wake up the system from the target sleep state, - * the D-state returned by _SxD is sufficient for that (we assume a - * wakeup-aware driver if wake is set). Still, if _SxW exists - * (ACPI 3.x), it should return the maximum (lowest power) D-state that - * can wake the system. _S0W may be valid, too. - */ - if (acpi_target_sleep_state == ACPI_STATE_S0 || - (device_may_wakeup(dev) && adev->wakeup.state.enabled && - adev->wakeup.sleep_state <= acpi_target_sleep_state)) { - acpi_status status; - - acpi_method[3] = 'W'; - status = acpi_evaluate_integer(handle, acpi_method, NULL, - &d_max); - if (ACPI_FAILURE(status)) { - d_max = d_min; - } else if (d_max < d_min) { - /* Warn the user of the broken DSDT */ - printk(KERN_WARNING "ACPI: Wrong value from %s\n", - acpi_method); - /* Sanitize it */ - d_min = d_max; - } - } - - if (d_min_p) - *d_min_p = d_min; - return d_max; -} - -/** - * acpi_pm_device_sleep_wake - enable or disable the system wake-up - * capability of given device - * @dev: device to handle - * @enable: 'true' - enable, 'false' - disable the wake-up capability - */ -int acpi_pm_device_sleep_wake(struct device *dev, bool enable) -{ - acpi_handle handle; - struct acpi_device *adev; - - if (!device_may_wakeup(dev)) - return -EINVAL; - - handle = DEVICE_ACPI_HANDLE(dev); - if (!handle || ACPI_FAILURE(acpi_bus_get_device(handle, &adev))) { - printk(KERN_DEBUG "ACPI handle has no context!\n"); - return -ENODEV; - } - - return enable ? - acpi_enable_wakeup_device_power(adev, acpi_target_sleep_state) : - acpi_disable_wakeup_device_power(adev); -} -#endif - -static void acpi_power_off_prepare(void) -{ - /* Prepare to power off the system */ - acpi_sleep_prepare(ACPI_STATE_S5); - acpi_disable_all_gpes(); -} - -static void acpi_power_off(void) -{ - /* acpi_sleep_prepare(ACPI_STATE_S5) should have already been called */ - printk("%s called\n", __func__); - local_irq_disable(); - acpi_enable_wakeup_device(ACPI_STATE_S5); - acpi_enter_sleep_state(ACPI_STATE_S5); -} - -int __init acpi_sleep_init(void) -{ - acpi_status status; - u8 type_a, type_b; -#ifdef CONFIG_SUSPEND - int i = 0; - - dmi_check_system(acpisleep_dmi_table); -#endif - - if (acpi_disabled) - return 0; - - sleep_states[ACPI_STATE_S0] = 1; - printk(KERN_INFO PREFIX "(supports S0"); - -#ifdef CONFIG_SUSPEND - for (i = ACPI_STATE_S1; i < ACPI_STATE_S4; i++) { - status = acpi_get_sleep_type_data(i, &type_a, &type_b); - if (ACPI_SUCCESS(status)) { - sleep_states[i] = 1; - printk(" S%d", i); - } - } - - suspend_set_ops(old_suspend_ordering ? - &acpi_suspend_ops_old : &acpi_suspend_ops); -#endif - -#ifdef CONFIG_HIBERNATION - status = acpi_get_sleep_type_data(ACPI_STATE_S4, &type_a, &type_b); - if (ACPI_SUCCESS(status)) { - hibernation_set_ops(old_suspend_ordering ? - &acpi_hibernation_ops_old : &acpi_hibernation_ops); - sleep_states[ACPI_STATE_S4] = 1; - printk(" S4"); - if (!nosigcheck) { - acpi_get_table(ACPI_SIG_FACS, 1, - (struct acpi_table_header **)&facs); - if (facs) - s4_hardware_signature = - facs->hardware_signature; - } - } -#endif - status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); - if (ACPI_SUCCESS(status)) { - sleep_states[ACPI_STATE_S5] = 1; - printk(" S5"); - pm_power_off_prepare = acpi_power_off_prepare; - pm_power_off = acpi_power_off; - } - printk(")\n"); - /* - * Register the tts_notifier to reboot notifier list so that the _TTS - * object can also be evaluated when the system enters S5. - */ - register_reboot_notifier(&tts_notifier); - return 0; -} diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c new file mode 100644 index 00000000000..7e3c609cbef --- /dev/null +++ b/drivers/acpi/sleep.c @@ -0,0 +1,747 @@ +/* + * sleep.c - ACPI sleep support. + * + * Copyright (c) 2005 Alexey Starikovskiy + * Copyright (c) 2004 David Shaohua Li + * Copyright (c) 2000-2003 Patrick Mochel + * Copyright (c) 2003 Open Source Development Lab + * + * This file is released under the GPLv2. + * + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include "sleep.h" + +u8 sleep_states[ACPI_S_STATE_COUNT]; + +static void acpi_sleep_tts_switch(u32 acpi_state) +{ + union acpi_object in_arg = { ACPI_TYPE_INTEGER }; + struct acpi_object_list arg_list = { 1, &in_arg }; + acpi_status status = AE_OK; + + in_arg.integer.value = acpi_state; + status = acpi_evaluate_object(NULL, "\\_TTS", &arg_list, NULL); + if (ACPI_FAILURE(status) && status != AE_NOT_FOUND) { + /* + * OS can't evaluate the _TTS object correctly. Some warning + * message will be printed. But it won't break anything. + */ + printk(KERN_NOTICE "Failure in evaluating _TTS object\n"); + } +} + +static int tts_notify_reboot(struct notifier_block *this, + unsigned long code, void *x) +{ + acpi_sleep_tts_switch(ACPI_STATE_S5); + return NOTIFY_DONE; +} + +static struct notifier_block tts_notifier = { + .notifier_call = tts_notify_reboot, + .next = NULL, + .priority = 0, +}; + +static int acpi_sleep_prepare(u32 acpi_state) +{ +#ifdef CONFIG_ACPI_SLEEP + /* do we have a wakeup address for S2 and S3? */ + if (acpi_state == ACPI_STATE_S3) { + if (!acpi_wakeup_address) { + return -EFAULT; + } + acpi_set_firmware_waking_vector( + (acpi_physical_address)acpi_wakeup_address); + + } + ACPI_FLUSH_CPU_CACHE(); + acpi_enable_wakeup_device_prep(acpi_state); +#endif + printk(KERN_INFO PREFIX "Preparing to enter system sleep state S%d\n", + acpi_state); + acpi_enter_sleep_state_prep(acpi_state); + return 0; +} + +#ifdef CONFIG_ACPI_SLEEP +static u32 acpi_target_sleep_state = ACPI_STATE_S0; +/* + * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the + * user to request that behavior by using the 'acpi_old_suspend_ordering' + * kernel command line option that causes the following variable to be set. + */ +static bool old_suspend_ordering; + +void __init acpi_old_suspend_ordering(void) +{ + old_suspend_ordering = true; +} + +/* + * According to the ACPI specification the BIOS should make sure that ACPI is + * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states. Still, + * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI + * on such systems during resume. Unfortunately that doesn't help in + * particularly pathological cases in which SCI_EN has to be set directly on + * resume, although the specification states very clearly that this flag is + * owned by the hardware. The set_sci_en_on_resume variable will be set in such + * cases. + */ +static bool set_sci_en_on_resume; +/* + * The ACPI specification wants us to save NVS memory regions during hibernation + * and to restore them during the subsequent resume. However, it is not certain + * if this mechanism is going to work on all machines, so we allow the user to + * disable this mechanism using the 'acpi_sleep=s4_nonvs' kernel command line + * option. + */ +static bool s4_no_nvs; + +void __init acpi_s4_no_nvs(void) +{ + s4_no_nvs = true; +} + +/** + * acpi_pm_disable_gpes - Disable the GPEs. + */ +static int acpi_pm_disable_gpes(void) +{ + acpi_disable_all_gpes(); + return 0; +} + +/** + * __acpi_pm_prepare - Prepare the platform to enter the target state. + * + * If necessary, set the firmware waking vector and do arch-specific + * nastiness to get the wakeup code to the waking vector. + */ +static int __acpi_pm_prepare(void) +{ + int error = acpi_sleep_prepare(acpi_target_sleep_state); + + if (error) + acpi_target_sleep_state = ACPI_STATE_S0; + return error; +} + +/** + * acpi_pm_prepare - Prepare the platform to enter the target sleep + * state and disable the GPEs. + */ +static int acpi_pm_prepare(void) +{ + int error = __acpi_pm_prepare(); + + if (!error) + acpi_disable_all_gpes(); + return error; +} + +/** + * acpi_pm_finish - Instruct the platform to leave a sleep state. + * + * This is called after we wake back up (or if entering the sleep state + * failed). + */ +static void acpi_pm_finish(void) +{ + u32 acpi_state = acpi_target_sleep_state; + + if (acpi_state == ACPI_STATE_S0) + return; + + printk(KERN_INFO PREFIX "Waking up from system sleep state S%d\n", + acpi_state); + acpi_disable_wakeup_device(acpi_state); + acpi_leave_sleep_state(acpi_state); + + /* reset firmware waking vector */ + acpi_set_firmware_waking_vector((acpi_physical_address) 0); + + acpi_target_sleep_state = ACPI_STATE_S0; +} + +/** + * acpi_pm_end - Finish up suspend sequence. + */ +static void acpi_pm_end(void) +{ + /* + * This is necessary in case acpi_pm_finish() is not called during a + * failing transition to a sleep state. + */ + acpi_target_sleep_state = ACPI_STATE_S0; + acpi_sleep_tts_switch(acpi_target_sleep_state); +} +#else /* !CONFIG_ACPI_SLEEP */ +#define acpi_target_sleep_state ACPI_STATE_S0 +#endif /* CONFIG_ACPI_SLEEP */ + +#ifdef CONFIG_SUSPEND +extern void do_suspend_lowlevel(void); + +static u32 acpi_suspend_states[] = { + [PM_SUSPEND_ON] = ACPI_STATE_S0, + [PM_SUSPEND_STANDBY] = ACPI_STATE_S1, + [PM_SUSPEND_MEM] = ACPI_STATE_S3, + [PM_SUSPEND_MAX] = ACPI_STATE_S5 +}; + +/** + * acpi_suspend_begin - Set the target system sleep state to the state + * associated with given @pm_state, if supported. + */ +static int acpi_suspend_begin(suspend_state_t pm_state) +{ + u32 acpi_state = acpi_suspend_states[pm_state]; + int error = 0; + + if (sleep_states[acpi_state]) { + acpi_target_sleep_state = acpi_state; + acpi_sleep_tts_switch(acpi_target_sleep_state); + } else { + printk(KERN_ERR "ACPI does not support this state: %d\n", + pm_state); + error = -ENOSYS; + } + return error; +} + +/** + * acpi_suspend_enter - Actually enter a sleep state. + * @pm_state: ignored + * + * Flush caches and go to sleep. For STR we have to call arch-specific + * assembly, which in turn call acpi_enter_sleep_state(). + * It's unfortunate, but it works. Please fix if you're feeling frisky. + */ +static int acpi_suspend_enter(suspend_state_t pm_state) +{ + acpi_status status = AE_OK; + unsigned long flags = 0; + u32 acpi_state = acpi_target_sleep_state; + + ACPI_FLUSH_CPU_CACHE(); + + /* Do arch specific saving of state. */ + if (acpi_state == ACPI_STATE_S3) { + int error = acpi_save_state_mem(); + + if (error) + return error; + } + + local_irq_save(flags); + acpi_enable_wakeup_device(acpi_state); + switch (acpi_state) { + case ACPI_STATE_S1: + barrier(); + status = acpi_enter_sleep_state(acpi_state); + break; + + case ACPI_STATE_S3: + do_suspend_lowlevel(); + break; + } + + /* If ACPI is not enabled by the BIOS, we need to enable it here. */ + if (set_sci_en_on_resume) + acpi_set_register(ACPI_BITREG_SCI_ENABLE, 1); + else + acpi_enable(); + + /* Reprogram control registers and execute _BFS */ + acpi_leave_sleep_state_prep(acpi_state); + + /* ACPI 3.0 specs (P62) says that it's the responsibility + * of the OSPM to clear the status bit [ implying that the + * POWER_BUTTON event should not reach userspace ] + */ + if (ACPI_SUCCESS(status) && (acpi_state == ACPI_STATE_S3)) + acpi_clear_event(ACPI_EVENT_POWER_BUTTON); + + /* + * Disable and clear GPE status before interrupt is enabled. Some GPEs + * (like wakeup GPE) haven't handler, this can avoid such GPE misfire. + * acpi_leave_sleep_state will reenable specific GPEs later + */ + acpi_disable_all_gpes(); + + local_irq_restore(flags); + printk(KERN_DEBUG "Back to C!\n"); + + /* restore processor state */ + if (acpi_state == ACPI_STATE_S3) + acpi_restore_state_mem(); + + return ACPI_SUCCESS(status) ? 0 : -EFAULT; +} + +static int acpi_suspend_state_valid(suspend_state_t pm_state) +{ + u32 acpi_state; + + switch (pm_state) { + case PM_SUSPEND_ON: + case PM_SUSPEND_STANDBY: + case PM_SUSPEND_MEM: + acpi_state = acpi_suspend_states[pm_state]; + + return sleep_states[acpi_state]; + default: + return 0; + } +} + +static struct platform_suspend_ops acpi_suspend_ops = { + .valid = acpi_suspend_state_valid, + .begin = acpi_suspend_begin, + .prepare = acpi_pm_prepare, + .enter = acpi_suspend_enter, + .finish = acpi_pm_finish, + .end = acpi_pm_end, +}; + +/** + * acpi_suspend_begin_old - Set the target system sleep state to the + * state associated with given @pm_state, if supported, and + * execute the _PTS control method. This function is used if the + * pre-ACPI 2.0 suspend ordering has been requested. + */ +static int acpi_suspend_begin_old(suspend_state_t pm_state) +{ + int error = acpi_suspend_begin(pm_state); + + if (!error) + error = __acpi_pm_prepare(); + return error; +} + +/* + * The following callbacks are used if the pre-ACPI 2.0 suspend ordering has + * been requested. + */ +static struct platform_suspend_ops acpi_suspend_ops_old = { + .valid = acpi_suspend_state_valid, + .begin = acpi_suspend_begin_old, + .prepare = acpi_pm_disable_gpes, + .enter = acpi_suspend_enter, + .finish = acpi_pm_finish, + .end = acpi_pm_end, + .recover = acpi_pm_finish, +}; + +static int __init init_old_suspend_ordering(const struct dmi_system_id *d) +{ + old_suspend_ordering = true; + return 0; +} + +static int __init init_set_sci_en_on_resume(const struct dmi_system_id *d) +{ + set_sci_en_on_resume = true; + return 0; +} + +static struct dmi_system_id __initdata acpisleep_dmi_table[] = { + { + .callback = init_old_suspend_ordering, + .ident = "Abit KN9 (nForce4 variant)", + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "http://www.abit.com.tw/"), + DMI_MATCH(DMI_BOARD_NAME, "KN9 Series(NF-CK804)"), + }, + }, + { + .callback = init_old_suspend_ordering, + .ident = "HP xw4600 Workstation", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP xw4600 Workstation"), + }, + }, + { + .callback = init_set_sci_en_on_resume, + .ident = "Apple MacBook 1,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Computer, Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "MacBook1,1"), + }, + }, + { + .callback = init_set_sci_en_on_resume, + .ident = "Apple MacMini 1,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Computer, Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Macmini1,1"), + }, + }, + {}, +}; +#endif /* CONFIG_SUSPEND */ + +#ifdef CONFIG_HIBERNATION +static unsigned long s4_hardware_signature; +static struct acpi_table_facs *facs; +static bool nosigcheck; + +void __init acpi_no_s4_hw_signature(void) +{ + nosigcheck = true; +} + +static int acpi_hibernation_begin(void) +{ + int error; + + error = s4_no_nvs ? 0 : hibernate_nvs_alloc(); + if (!error) { + acpi_target_sleep_state = ACPI_STATE_S4; + acpi_sleep_tts_switch(acpi_target_sleep_state); + } + + return error; +} + +static int acpi_hibernation_pre_snapshot(void) +{ + int error = acpi_pm_prepare(); + + if (!error) + hibernate_nvs_save(); + + return error; +} + +static int acpi_hibernation_enter(void) +{ + acpi_status status = AE_OK; + unsigned long flags = 0; + + ACPI_FLUSH_CPU_CACHE(); + + local_irq_save(flags); + acpi_enable_wakeup_device(ACPI_STATE_S4); + /* This shouldn't return. If it returns, we have a problem */ + status = acpi_enter_sleep_state(ACPI_STATE_S4); + /* Reprogram control registers and execute _BFS */ + acpi_leave_sleep_state_prep(ACPI_STATE_S4); + local_irq_restore(flags); + + return ACPI_SUCCESS(status) ? 0 : -EFAULT; +} + +static void acpi_hibernation_finish(void) +{ + hibernate_nvs_free(); + acpi_pm_finish(); +} + +static void acpi_hibernation_leave(void) +{ + /* + * If ACPI is not enabled by the BIOS and the boot kernel, we need to + * enable it here. + */ + acpi_enable(); + /* Reprogram control registers and execute _BFS */ + acpi_leave_sleep_state_prep(ACPI_STATE_S4); + /* Check the hardware signature */ + if (facs && s4_hardware_signature != facs->hardware_signature) { + printk(KERN_EMERG "ACPI: Hardware changed while hibernated, " + "cannot resume!\n"); + panic("ACPI S4 hardware signature mismatch"); + } + /* Restore the NVS memory area */ + hibernate_nvs_restore(); +} + +static void acpi_pm_enable_gpes(void) +{ + acpi_enable_all_runtime_gpes(); +} + +static struct platform_hibernation_ops acpi_hibernation_ops = { + .begin = acpi_hibernation_begin, + .end = acpi_pm_end, + .pre_snapshot = acpi_hibernation_pre_snapshot, + .finish = acpi_hibernation_finish, + .prepare = acpi_pm_prepare, + .enter = acpi_hibernation_enter, + .leave = acpi_hibernation_leave, + .pre_restore = acpi_pm_disable_gpes, + .restore_cleanup = acpi_pm_enable_gpes, +}; + +/** + * acpi_hibernation_begin_old - Set the target system sleep state to + * ACPI_STATE_S4 and execute the _PTS control method. This + * function is used if the pre-ACPI 2.0 suspend ordering has been + * requested. + */ +static int acpi_hibernation_begin_old(void) +{ + int error; + /* + * The _TTS object should always be evaluated before the _PTS object. + * When the old_suspended_ordering is true, the _PTS object is + * evaluated in the acpi_sleep_prepare. + */ + acpi_sleep_tts_switch(ACPI_STATE_S4); + + error = acpi_sleep_prepare(ACPI_STATE_S4); + + if (!error) { + if (!s4_no_nvs) + error = hibernate_nvs_alloc(); + if (!error) + acpi_target_sleep_state = ACPI_STATE_S4; + } + return error; +} + +static int acpi_hibernation_pre_snapshot_old(void) +{ + int error = acpi_pm_disable_gpes(); + + if (!error) + hibernate_nvs_save(); + + return error; +} + +/* + * The following callbacks are used if the pre-ACPI 2.0 suspend ordering has + * been requested. + */ +static struct platform_hibernation_ops acpi_hibernation_ops_old = { + .begin = acpi_hibernation_begin_old, + .end = acpi_pm_end, + .pre_snapshot = acpi_hibernation_pre_snapshot_old, + .finish = acpi_hibernation_finish, + .prepare = acpi_pm_disable_gpes, + .enter = acpi_hibernation_enter, + .leave = acpi_hibernation_leave, + .pre_restore = acpi_pm_disable_gpes, + .restore_cleanup = acpi_pm_enable_gpes, + .recover = acpi_pm_finish, +}; +#endif /* CONFIG_HIBERNATION */ + +int acpi_suspend(u32 acpi_state) +{ + suspend_state_t states[] = { + [1] = PM_SUSPEND_STANDBY, + [3] = PM_SUSPEND_MEM, + [5] = PM_SUSPEND_MAX + }; + + if (acpi_state < 6 && states[acpi_state]) + return pm_suspend(states[acpi_state]); + if (acpi_state == 4) + return hibernate(); + return -EINVAL; +} + +#ifdef CONFIG_PM_SLEEP +/** + * acpi_pm_device_sleep_state - return preferred power state of ACPI device + * in the system sleep state given by %acpi_target_sleep_state + * @dev: device to examine; its driver model wakeup flags control + * whether it should be able to wake up the system + * @d_min_p: used to store the upper limit of allowed states range + * Return value: preferred power state of the device on success, -ENODEV on + * failure (ie. if there's no 'struct acpi_device' for @dev) + * + * Find the lowest power (highest number) ACPI device power state that + * device @dev can be in while the system is in the sleep state represented + * by %acpi_target_sleep_state. If @wake is nonzero, the device should be + * able to wake up the system from this sleep state. If @d_min_p is set, + * the highest power (lowest number) device power state of @dev allowed + * in this system sleep state is stored at the location pointed to by it. + * + * The caller must ensure that @dev is valid before using this function. + * The caller is also responsible for figuring out if the device is + * supposed to be able to wake up the system and passing this information + * via @wake. + */ + +int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p) +{ + acpi_handle handle = DEVICE_ACPI_HANDLE(dev); + struct acpi_device *adev; + char acpi_method[] = "_SxD"; + unsigned long long d_min, d_max; + + if (!handle || ACPI_FAILURE(acpi_bus_get_device(handle, &adev))) { + printk(KERN_DEBUG "ACPI handle has no context!\n"); + return -ENODEV; + } + + acpi_method[2] = '0' + acpi_target_sleep_state; + /* + * If the sleep state is S0, we will return D3, but if the device has + * _S0W, we will use the value from _S0W + */ + d_min = ACPI_STATE_D0; + d_max = ACPI_STATE_D3; + + /* + * If present, _SxD methods return the minimum D-state (highest power + * state) we can use for the corresponding S-states. Otherwise, the + * minimum D-state is D0 (ACPI 3.x). + * + * NOTE: We rely on acpi_evaluate_integer() not clobbering the integer + * provided -- that's our fault recovery, we ignore retval. + */ + if (acpi_target_sleep_state > ACPI_STATE_S0) + acpi_evaluate_integer(handle, acpi_method, NULL, &d_min); + + /* + * If _PRW says we can wake up the system from the target sleep state, + * the D-state returned by _SxD is sufficient for that (we assume a + * wakeup-aware driver if wake is set). Still, if _SxW exists + * (ACPI 3.x), it should return the maximum (lowest power) D-state that + * can wake the system. _S0W may be valid, too. + */ + if (acpi_target_sleep_state == ACPI_STATE_S0 || + (device_may_wakeup(dev) && adev->wakeup.state.enabled && + adev->wakeup.sleep_state <= acpi_target_sleep_state)) { + acpi_status status; + + acpi_method[3] = 'W'; + status = acpi_evaluate_integer(handle, acpi_method, NULL, + &d_max); + if (ACPI_FAILURE(status)) { + d_max = d_min; + } else if (d_max < d_min) { + /* Warn the user of the broken DSDT */ + printk(KERN_WARNING "ACPI: Wrong value from %s\n", + acpi_method); + /* Sanitize it */ + d_min = d_max; + } + } + + if (d_min_p) + *d_min_p = d_min; + return d_max; +} + +/** + * acpi_pm_device_sleep_wake - enable or disable the system wake-up + * capability of given device + * @dev: device to handle + * @enable: 'true' - enable, 'false' - disable the wake-up capability + */ +int acpi_pm_device_sleep_wake(struct device *dev, bool enable) +{ + acpi_handle handle; + struct acpi_device *adev; + + if (!device_may_wakeup(dev)) + return -EINVAL; + + handle = DEVICE_ACPI_HANDLE(dev); + if (!handle || ACPI_FAILURE(acpi_bus_get_device(handle, &adev))) { + printk(KERN_DEBUG "ACPI handle has no context!\n"); + return -ENODEV; + } + + return enable ? + acpi_enable_wakeup_device_power(adev, acpi_target_sleep_state) : + acpi_disable_wakeup_device_power(adev); +} +#endif + +static void acpi_power_off_prepare(void) +{ + /* Prepare to power off the system */ + acpi_sleep_prepare(ACPI_STATE_S5); + acpi_disable_all_gpes(); +} + +static void acpi_power_off(void) +{ + /* acpi_sleep_prepare(ACPI_STATE_S5) should have already been called */ + printk("%s called\n", __func__); + local_irq_disable(); + acpi_enable_wakeup_device(ACPI_STATE_S5); + acpi_enter_sleep_state(ACPI_STATE_S5); +} + +int __init acpi_sleep_init(void) +{ + acpi_status status; + u8 type_a, type_b; +#ifdef CONFIG_SUSPEND + int i = 0; + + dmi_check_system(acpisleep_dmi_table); +#endif + + if (acpi_disabled) + return 0; + + sleep_states[ACPI_STATE_S0] = 1; + printk(KERN_INFO PREFIX "(supports S0"); + +#ifdef CONFIG_SUSPEND + for (i = ACPI_STATE_S1; i < ACPI_STATE_S4; i++) { + status = acpi_get_sleep_type_data(i, &type_a, &type_b); + if (ACPI_SUCCESS(status)) { + sleep_states[i] = 1; + printk(" S%d", i); + } + } + + suspend_set_ops(old_suspend_ordering ? + &acpi_suspend_ops_old : &acpi_suspend_ops); +#endif + +#ifdef CONFIG_HIBERNATION + status = acpi_get_sleep_type_data(ACPI_STATE_S4, &type_a, &type_b); + if (ACPI_SUCCESS(status)) { + hibernation_set_ops(old_suspend_ordering ? + &acpi_hibernation_ops_old : &acpi_hibernation_ops); + sleep_states[ACPI_STATE_S4] = 1; + printk(" S4"); + if (!nosigcheck) { + acpi_get_table(ACPI_SIG_FACS, 1, + (struct acpi_table_header **)&facs); + if (facs) + s4_hardware_signature = + facs->hardware_signature; + } + } +#endif + status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b); + if (ACPI_SUCCESS(status)) { + sleep_states[ACPI_STATE_S5] = 1; + printk(" S5"); + pm_power_off_prepare = acpi_power_off_prepare; + pm_power_off = acpi_power_off; + } + printk(")\n"); + /* + * Register the tts_notifier to reboot notifier list so that the _TTS + * object can also be evaluated when the system enters S5. + */ + register_reboot_notifier(&tts_notifier); + return 0; +} -- cgit From d45e0855488032ea62ec5638fb1dcd47367f8ddb Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 15 Jan 2009 15:12:27 -0500 Subject: ACPI PCI hotplug: harden against panic regression ACPI hotplug panic with current git head http://lkml.org/lkml/2009/1/10/136 Rather than reverting the entire commit that causes the crash: e8c331e963c58b83db24b7d0e39e8c07f687dbc6 "PCI hotplug: introduce functions for ACPI slot detection" simply harden against it while the changes to the hotplug code on this particularl machine are understood. Signed-off-by: James Bottomley Acked-by: Jesse Barnes Signed-off-by: Len Brown --- drivers/pci/hotplug/acpiphp_glue.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index f09b1010d47..803d9ddd6e7 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -266,6 +266,8 @@ static int detect_ejectable_slots(struct pci_bus *pbus) int found = acpi_pci_detect_ejectable(pbus); if (!found) { acpi_handle bridge_handle = acpi_pci_get_bridge_handle(pbus); + if (!bridge_handle) + return 0; acpi_walk_namespace(ACPI_TYPE_DEVICE, bridge_handle, (u32)1, is_pci_dock_device, (void *)&found, NULL); } -- cgit From 1de9e8e70f5acc441550ca75433563d91b269bbe Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 16 Jan 2009 12:43:00 -0800 Subject: Linux 2.6.29-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index c06e250eca1..207303da397 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 29 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Erotic Pickled Herring # *DOCUMENTATION* -- cgit From 92181f190b649f7ef2b79cbf5c00f26ccc66da2a Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 20 Jan 2009 04:24:26 +0100 Subject: x86: optimise x86's do_page_fault (C entry point for the page fault path) Impact: cleanup, restructure code to improve assembly gcc isn't _all_ that smart about spilling registers to stack or reusing stack slots, even with branch annotations. do_page_fault contained a lot of functionality, so split unlikely paths into their own functions, and mark them as noinline just to be sure. I consider this actually to be somewhat of a cleanup too: the main function now contains about half the number of lines so the normal path is easier to read, while the error cases are also nicely split away. Also, ensure the order of arguments to functions is always the same: regs, addr, error_code. This can reduce code size a tiny bit, and just looks neater too. And add a couple of branch annotations. Before: do_page_fault: subq $360, %rsp #, After: do_page_fault: subq $56, %rsp #, bloat-o-meter: add/remove: 8/0 grow/shrink: 0/1 up/down: 2222/-1680 (542) function old new delta __bad_area_nosemaphore - 506 +506 no_context - 474 +474 vmalloc_fault - 424 +424 spurious_fault - 358 +358 mm_fault_error - 272 +272 bad_area_access_error - 89 +89 bad_area - 89 +89 bad_area_nosemaphore - 10 +10 do_page_fault 2464 784 -1680 Yes, the total size increases by 542 bytes, due to the extra function calls. But these will very rarely be called (except for vmalloc_fault) in a normal workload. Importantly, do_page_fault is less than 1/3rd it's original size, and touches far less stack. Existing gotos and branch hints did move a lot of the infrequently used text out of the fastpath, but that's even further improved after this patch. Signed-off-by: Nick Piggin Acked-by: Linus Torvalds Signed-off-by: Ingo Molnar --- arch/x86/mm/fault.c | 438 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 256 insertions(+), 182 deletions(-) diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 90dfae511a4..033292dc9e2 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -91,8 +91,8 @@ static inline int notify_page_fault(struct pt_regs *regs) * * Opcode checker based on code by Richard Brunner */ -static int is_prefetch(struct pt_regs *regs, unsigned long addr, - unsigned long error_code) +static int is_prefetch(struct pt_regs *regs, unsigned long error_code, + unsigned long addr) { unsigned char *instr; int scan_more = 1; @@ -409,15 +409,15 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, } #ifdef CONFIG_X86_64 -static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, - unsigned long error_code) +static noinline void pgtable_bad(struct pt_regs *regs, + unsigned long error_code, unsigned long address) { unsigned long flags = oops_begin(); int sig = SIGKILL; - struct task_struct *tsk; + struct task_struct *tsk = current; printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", - current->comm, address); + tsk->comm, address); dump_pagetable(address); tsk = current; tsk->thread.cr2 = address; @@ -429,6 +429,190 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, } #endif +static noinline void no_context(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + struct task_struct *tsk = current; +#ifdef CONFIG_X86_64 + unsigned long flags; + int sig; +#endif + + /* Are we prepared to handle this kernel fault? */ + if (fixup_exception(regs)) + return; + + /* + * X86_32 + * Valid to do another page fault here, because if this fault + * had been triggered by is_prefetch fixup_exception would have + * handled it. + * + * X86_64 + * Hall of shame of CPU/BIOS bugs. + */ + if (is_prefetch(regs, error_code, address)) + return; + + if (is_errata93(regs, address)) + return; + + /* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ +#ifdef CONFIG_X86_32 + bust_spinlocks(1); +#else + flags = oops_begin(); +#endif + + show_fault_oops(regs, error_code, address); + + tsk->thread.cr2 = address; + tsk->thread.trap_no = 14; + tsk->thread.error_code = error_code; + +#ifdef CONFIG_X86_32 + die("Oops", regs, error_code); + bust_spinlocks(0); + do_exit(SIGKILL); +#else + sig = SIGKILL; + if (__die("Oops", regs, error_code)) + sig = 0; + /* Executive summary in case the body of the oops scrolled away */ + printk(KERN_EMERG "CR2: %016lx\n", address); + oops_end(flags, regs, sig); +#endif +} + +static void __bad_area_nosemaphore(struct pt_regs *regs, + unsigned long error_code, unsigned long address, + int si_code) +{ + struct task_struct *tsk = current; + + /* User mode accesses just cause a SIGSEGV */ + if (error_code & PF_USER) { + /* + * It's possible to have interrupts off here. + */ + local_irq_enable(); + + /* + * Valid to do another page fault here because this one came + * from user space. + */ + if (is_prefetch(regs, error_code, address)) + return; + + if (is_errata100(regs, address)) + return; + + if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && + printk_ratelimit()) { + printk( + "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), address, + (void *) regs->ip, (void *) regs->sp, error_code); + print_vma_addr(" in ", regs->ip); + printk("\n"); + } + + tsk->thread.cr2 = address; + /* Kernel addresses are always protection faults */ + tsk->thread.error_code = error_code | (address >= TASK_SIZE); + tsk->thread.trap_no = 14; + force_sig_info_fault(SIGSEGV, si_code, address, tsk); + return; + } + + if (is_f00f_bug(regs, address)) + return; + + no_context(regs, error_code, address); +} + +static noinline void bad_area_nosemaphore(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR); +} + +static void __bad_area(struct pt_regs *regs, + unsigned long error_code, unsigned long address, + int si_code) +{ + struct mm_struct *mm = current->mm; + + /* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ + up_read(&mm->mmap_sem); + + __bad_area_nosemaphore(regs, error_code, address, si_code); +} + +static noinline void bad_area(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + __bad_area(regs, error_code, address, SEGV_MAPERR); +} + +static noinline void bad_area_access_error(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + __bad_area(regs, error_code, address, SEGV_ACCERR); +} + +/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */ +static void out_of_memory(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ + up_read(¤t->mm->mmap_sem); + pagefault_out_of_memory(); +} + +static void do_sigbus(struct pt_regs *regs, + unsigned long error_code, unsigned long address) +{ + struct task_struct *tsk = current; + struct mm_struct *mm = tsk->mm; + + up_read(&mm->mmap_sem); + + /* Kernel mode? Handle exceptions or die */ + if (!(error_code & PF_USER)) + no_context(regs, error_code, address); +#ifdef CONFIG_X86_32 + /* User space => ok to do another page fault */ + if (is_prefetch(regs, error_code, address)) + return; +#endif + tsk->thread.cr2 = address; + tsk->thread.error_code = error_code; + tsk->thread.trap_no = 14; + force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); +} + +static noinline void mm_fault_error(struct pt_regs *regs, + unsigned long error_code, unsigned long address, unsigned int fault) +{ + if (fault & VM_FAULT_OOM) + out_of_memory(regs, error_code, address); + else if (fault & VM_FAULT_SIGBUS) + do_sigbus(regs, error_code, address); + else + BUG(); +} + static int spurious_fault_check(unsigned long error_code, pte_t *pte) { if ((error_code & PF_WRITE) && !pte_write(*pte)) @@ -448,8 +632,8 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte) * There are no security implications to leaving a stale TLB when * increasing the permissions on a page. */ -static int spurious_fault(unsigned long address, - unsigned long error_code) +static noinline int spurious_fault(unsigned long error_code, + unsigned long address) { pgd_t *pgd; pud_t *pud; @@ -494,7 +678,7 @@ static int spurious_fault(unsigned long address, * * This assumes no large pages in there. */ -static int vmalloc_fault(unsigned long address) +static noinline int vmalloc_fault(unsigned long address) { #ifdef CONFIG_X86_32 unsigned long pgd_paddr; @@ -573,6 +757,25 @@ static int vmalloc_fault(unsigned long address) int show_unhandled_signals = 1; +static inline int access_error(unsigned long error_code, int write, + struct vm_area_struct *vma) +{ + if (write) { + /* write, present and write, not present */ + if (unlikely(!(vma->vm_flags & VM_WRITE))) + return 1; + } else if (unlikely(error_code & PF_PROT)) { + /* read, present */ + return 1; + } else { + /* read, not present */ + if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))) + return 1; + } + + return 0; +} + /* * This routine handles page faults. It determines the address, * and the problem, and then passes it off to one of the appropriate @@ -583,16 +786,12 @@ asmlinkage #endif void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) { + unsigned long address; struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; - unsigned long address; - int write, si_code; + int write; int fault; -#ifdef CONFIG_X86_64 - unsigned long flags; - int sig; -#endif tsk = current; mm = tsk->mm; @@ -601,9 +800,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) /* get the address */ address = read_cr2(); - si_code = SEGV_MAPERR; - - if (notify_page_fault(regs)) + if (unlikely(notify_page_fault(regs))) return; if (unlikely(kmmio_fault(regs, address))) return; @@ -631,17 +828,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) return; /* Can handle a stale RO->RW TLB */ - if (spurious_fault(address, error_code)) + if (spurious_fault(error_code, address)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch * fault we could otherwise deadlock. */ - goto bad_area_nosemaphore; + bad_area_nosemaphore(regs, error_code, address); + return; } - /* * It's safe to allow irq's after cr2 has been saved and the * vmalloc fault has been handled. @@ -657,15 +854,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) #ifdef CONFIG_X86_64 if (unlikely(error_code & PF_RSVD)) - pgtable_bad(address, regs, error_code); + pgtable_bad(regs, error_code, address); #endif /* * If we're in an interrupt, have no user context or are running in an * atomic region then we must not take the fault. */ - if (unlikely(in_atomic() || !mm)) - goto bad_area_nosemaphore; + if (unlikely(in_atomic() || !mm)) { + bad_area_nosemaphore(regs, error_code, address); + return; + } /* * When running in the kernel we expect faults to occur only to @@ -683,20 +882,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. */ - if (!down_read_trylock(&mm->mmap_sem)) { + if (unlikely(!down_read_trylock(&mm->mmap_sem))) { if ((error_code & PF_USER) == 0 && - !search_exception_tables(regs->ip)) - goto bad_area_nosemaphore; + !search_exception_tables(regs->ip)) { + bad_area_nosemaphore(regs, error_code, address); + return; + } down_read(&mm->mmap_sem); } vma = find_vma(mm, address); - if (!vma) - goto bad_area; - if (vma->vm_start <= address) + if (unlikely(!vma)) { + bad_area(regs, error_code, address); + return; + } + if (likely(vma->vm_start <= address)) goto good_area; - if (!(vma->vm_flags & VM_GROWSDOWN)) - goto bad_area; + if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { + bad_area(regs, error_code, address); + return; + } if (error_code & PF_USER) { /* * Accessing the stack below %sp is always a bug. @@ -704,31 +909,25 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) * and pusha to work. ("enter $65535,$31" pushes * 32 pointers and then decrements %sp by 65535.) */ - if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) - goto bad_area; + if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) { + bad_area(regs, error_code, address); + return; + } } - if (expand_stack(vma, address)) - goto bad_area; -/* - * Ok, we have a good vm_area for this memory access, so - * we can handle it.. - */ + if (unlikely(expand_stack(vma, address))) { + bad_area(regs, error_code, address); + return; + } + + /* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ good_area: - si_code = SEGV_ACCERR; - write = 0; - switch (error_code & (PF_PROT|PF_WRITE)) { - default: /* 3: write, present */ - /* fall through */ - case PF_WRITE: /* write, not present */ - if (!(vma->vm_flags & VM_WRITE)) - goto bad_area; - write++; - break; - case PF_PROT: /* read, present */ - goto bad_area; - case 0: /* read, not present */ - if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) - goto bad_area; + write = error_code & PF_WRITE; + if (unlikely(access_error(error_code, write, vma))) { + bad_area_access_error(regs, error_code, address); + return; } /* @@ -738,11 +937,8 @@ good_area: */ fault = handle_mm_fault(mm, vma, address, write); if (unlikely(fault & VM_FAULT_ERROR)) { - if (fault & VM_FAULT_OOM) - goto out_of_memory; - else if (fault & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); + mm_fault_error(regs, error_code, address, fault); + return; } if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; @@ -760,128 +956,6 @@ good_area: } #endif up_read(&mm->mmap_sem); - return; - -/* - * Something tried to access memory that isn't in our memory map.. - * Fix it, but check if it's kernel or user first.. - */ -bad_area: - up_read(&mm->mmap_sem); - -bad_area_nosemaphore: - /* User mode accesses just cause a SIGSEGV */ - if (error_code & PF_USER) { - /* - * It's possible to have interrupts off here. - */ - local_irq_enable(); - - /* - * Valid to do another page fault here because this one came - * from user space. - */ - if (is_prefetch(regs, address, error_code)) - return; - - if (is_errata100(regs, address)) - return; - - if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && - printk_ratelimit()) { - printk( - "%s%s[%d]: segfault at %lx ip %p sp %p error %lx", - task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, task_pid_nr(tsk), address, - (void *) regs->ip, (void *) regs->sp, error_code); - print_vma_addr(" in ", regs->ip); - printk("\n"); - } - - tsk->thread.cr2 = address; - /* Kernel addresses are always protection faults */ - tsk->thread.error_code = error_code | (address >= TASK_SIZE); - tsk->thread.trap_no = 14; - force_sig_info_fault(SIGSEGV, si_code, address, tsk); - return; - } - - if (is_f00f_bug(regs, address)) - return; - -no_context: - /* Are we prepared to handle this kernel fault? */ - if (fixup_exception(regs)) - return; - - /* - * X86_32 - * Valid to do another page fault here, because if this fault - * had been triggered by is_prefetch fixup_exception would have - * handled it. - * - * X86_64 - * Hall of shame of CPU/BIOS bugs. - */ - if (is_prefetch(regs, address, error_code)) - return; - - if (is_errata93(regs, address)) - return; - -/* - * Oops. The kernel tried to access some bad page. We'll have to - * terminate things with extreme prejudice. - */ -#ifdef CONFIG_X86_32 - bust_spinlocks(1); -#else - flags = oops_begin(); -#endif - - show_fault_oops(regs, error_code, address); - - tsk->thread.cr2 = address; - tsk->thread.trap_no = 14; - tsk->thread.error_code = error_code; - -#ifdef CONFIG_X86_32 - die("Oops", regs, error_code); - bust_spinlocks(0); - do_exit(SIGKILL); -#else - sig = SIGKILL; - if (__die("Oops", regs, error_code)) - sig = 0; - /* Executive summary in case the body of the oops scrolled away */ - printk(KERN_EMERG "CR2: %016lx\n", address); - oops_end(flags, regs, sig); -#endif - -out_of_memory: - /* - * We ran out of memory, call the OOM killer, and return the userspace - * (which will retry the fault, or kill us if we got oom-killed). - */ - up_read(&mm->mmap_sem); - pagefault_out_of_memory(); - return; - -do_sigbus: - up_read(&mm->mmap_sem); - - /* Kernel mode? Handle exceptions or die */ - if (!(error_code & PF_USER)) - goto no_context; -#ifdef CONFIG_X86_32 - /* User space => ok to do another page fault */ - if (is_prefetch(regs, address, error_code)) - return; -#endif - tsk->thread.cr2 = address; - tsk->thread.error_code = error_code; - tsk->thread.trap_no = 14; - force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); } DEFINE_SPINLOCK(pgd_lock); -- cgit