diff options
| author | D. Johnson <fenris02@fedoraproject.org> | 2013-08-16 12:05:11 -0500 |
|---|---|---|
| committer | D. Johnson <fenris02@fedoraproject.org> | 2013-08-16 12:05:11 -0500 |
| commit | be3607bef92d35711a78a3746e9739a67ccac240 (patch) | |
| tree | 5127be30dbd0778e31562bf873e312aa2a014c20 | |
| parent | a4453f25e148bd4a058bbfc744f0d4a22b83fc06 (diff) | |
| download | cleanup-be3607bef92d35711a78a3746e9739a67ccac240.tar.gz cleanup-be3607bef92d35711a78a3746e9739a67ccac240.tar.xz cleanup-be3607bef92d35711a78a3746e9739a67ccac240.zip | |
Add group hack for f19+
| -rwxr-xr-x | debloat | 2030 | ||||
| -rwxr-xr-x | debloat.sh | 127 | ||||
| -rwxr-xr-x | rpm-verify.sh | 10 | ||||
| -rw-r--r-- | simple_qos.sh | 255 |
4 files changed, 2422 insertions, 0 deletions
@@ -0,0 +1,2030 @@ +#!/usr/bin/lua + +-- Use of various qdiscs for ethernet and wireless + +-- This script expects to be run in /etc/network/if-pre-up.d To run it +-- manually, do a IFACE=yournetworkcard ./debloat + +-- For NATTED interfaces, use a NAT=y for a better filter +-- To select QFQ use QMODEL=qfq. + +-- Some options currently require a new version of tc. Build a version +-- of iproute2 and stick it somewhere and change the TC variable to +-- suit. + +-- These are the possible parameters that can be changed via +-- environment variables. + +params = { "MDISC", "BIGDISC", "NORMDISC", "BINS", "MAX_HWQ_BYTES", + "QMODEL", "FORCE_SPEED", "FORCE_RING", "QDEBUG", "VERBOSE", + "UPLINK", "DOWNLINK", "IFACE", "SPEED", "DEPTH", "DEBLOATLOG", + "NAT", "NAT64", "CLAMP_MSS", "SYN_FLOOD", "MTU", "INGRESS", + "TC", "TCARG", "ETHTOOL", "INSMOD", "LSMOD", "IPTABLES", + "IP6TABLES", "PINGOPT", "FLOWS", "TSO", "BURST", "IPV6", + "OVERHEAD", "LINKLAYER", "MPU", "PPPOE", "ADSL", + "NOPRIOHOSTSRC","NOPRIOHOSTDST","NOPRIOPORTSRC", + "NOPRIOPORTDST", "EST_MIN", "EST_MAX", "HEADDROP", "ECNMASK", + "qlen_vo", "qlen_vi", "qlen_be", "qlen_bk", "CODEL_LL_QUANTUM" } + +-- Useful defaults + +env = { ["TC"] = "/sbin/tc", + ["TCARG"] = "-b", + ["INSMOD"] = "/sbin/modprobe", + ["ETHTOOL"] = "/sbin/ethtool", + ["LSMOD"] = "/sbin/lsmod", + ["IPTABLES"] = "/sbin/iptables", + ["IP6TABLES"] = "/sbin/ip6tables", + ["MDISC"] = "codel", + ["BIGDISC"] = "codel", + ["NORMDISC"] = "codel", + ["BINS"] = 2048, + ["DEPTH"] = 24, + ["QMODEL"] = "fq_codel_ll", + ["MAX_HWQ_BYTES"] = 3000, + ["ECNMASK"] = 0xfc, + ["EST_MIN"] = 1, + ["EST_MAX"] = 4, + ["IPV6"] = true, + ["LINKLAYER"] = "ethernet", + ["DEBLOATLOG"] = "/dev/null", + ["qlen_vo"] = 2, + ["qlen_vi"] = 4, + ["qlen_be"] = 12, + ["qlen_bk"] = 12, + ["MTU"] = 1500, + ["CODEL_LL_QUANTUM"] = 1000 + } + +wireless_debug = "/sys/kernel/debug/ieee80211/" +qlens = { "qlen_vo", "qlen_vi", "qlen_bk", "qlen_be" } + +-- various shortcuts for commonly used functions + +local sf=string.format +local exec=os.execute +local popen=io.popen +local open=io.open + +-- FIXME, override above to redirect stderr + +VO=0x10; VI=0x20; BE=0x30; BK=0x40 +local WQUEUES = { BE, VO, VI, BK } + +local function usage(s) o=[[ + +The debloat tool aims for minimal latency (particularly under load) on +the network, for hosts, servers, wireless devices, and routers. + +There are various forms of traffic shapers and tools in here because +this is an unsolved problem! Most of the known techniques are in here, +however, and the results can be quite remarkable. At tested rates of +100Mbit and 4Mbit, we see interstream latencies drop by over two +orders of magnitude. + +This script expects to be run in /etc/network/if-pre-up.d + +To run it manually, do a: + +IFACE=yournetworkcard ./this_script + +For NATTED interfaces, use a NAT=y for a better filter. + +There are many environment variables and at some point will be a conf +file. The one of greatest importance is "QMODEL" to which I keep +adding various models for various circumstances. See the end of this +file for more details. + +This script can be run on both debian and openwrt. + +Usage of QFQ and the advanced SFQ and SFQRED options currently +requires a patched version of iproute2 and a Linux 3.3 kernel. + +Build a version and stick it somewhere and change TC to suit. + +Also, if you are interested in seeing the rules being generated, +rather than reconfiguring your system + +export QDEBUG=1 + +is helpful. + +* Some general overall design notes: + +This started out life as a shell script to exercise qfq, +Now it does a lot more than that and is getting crufty. + +FQ_CODEL is now the default. SFQ has been improved significantly +in Linux 3.3 (eliminating a head of line problem), and in this case +no new TC utility is required. Also a bug in red was fixed, and no +new tc utility is required there either. So if you were using either +or both of these qdiscs, you should automagically see your life +improve... + +QFQ is too buggy prior to 3.3 to use. + +More advanced SFQ options and REDSFQ and QFQ all require a patched +version of TC. Also, most builds for the linux kernel do not +enable QFQ by default. QFQ and SFQ are behaving competitively now +in most circumstances, however. + +* Byte Queue Limits is supposed to have a rate limiter that works. + +It is not very effective at less than 100Mbit. I get ~32k peak there +and with GSO on, at 100Mbit, I have seen latency spikes of up to 70ms. + + (Not recently tested, however) + +A per queue limit of 2 large packets appears to be the best +compromise at 100Mbit and below. So typically I hammer down BQL to +3k at < 100Mbit, and turn GSO/TSO off, and as a result see +ping against load latencies in the 1 to 2ms range, which is about +what you would expect. I have tried 1500 bytes, which limited the top +end performance to about 84Mbit. At 10Mbit, 1514 works on most OSes. + +For comparison, you will see PFIFO_FAST doing 130+ms, pre BQL, no +SFQ at 100Mbit. + +* A BQL enabled ethernet device driver is helpful + +But there is currently no good way to detect if you have one at run +time. 10 of the most major drivers have been convered to BQL, more +remain. + +* Wireless still has problems + +This stuff helps on wireless stations, desktops, etc, and on P2P +wireless links. + +** caveat 1 + +There remains so much device buffering and retries below the qdisc +layer as to defeat both FQ and and AQM to a large extent. Also packets +tend to be held 'forever' (ping rtts of over 10 seconds have been +observed) + +A time in queue optimization at the qdisc layer for the latter problem +has been proposed, but not implemented, and much further work on the +wireless driver portion of the stack remains to be designed and agreed +upon. + +BQL has not (and cannot, to a large extent) be implemented on the +wireless portion of the stack as it currently stands. + +** caveat 2 + +There is not a particularly good way to apply much of this to the +wireless interface on an AP as yet. FQ messes with wireless-n packet +aggregation. That said, under home use with a limited number of user, +SFQ+RED does seem to work pretty good. + +* Some QFQ related notes: +** QFQ can handle up to 32k bins + +Whether you are willing to wait for them to be generated is a better +question. How this interacts with bittorrent etc is also a good +question. 512 is 4x as many bins as the old SFQ implementation. + +I have tested as many as 2048 bins, problems ensue with kernel +memory allocation at various levels higher than that. + +The 'bin creation' problem is why this code uses tc in batch mode. It +used to take minutes to create the bins. Now, a split second. (there +was also a patch that helped this in 3.3) + +** Various sub-qdiscs in QFQ + +I have tried pfifo_drop_head, SFB, and RED here. All had bugs until +3.3. And linux RED & SFB, being byte oriented, was often not good. +pfifo_drop_head generates interesting results. + +The very new combination of REDSFQ which compensates for both bytes +and packets was very interesting, as it combines everything we have +learned in the past year into one single qdisc which can be brought up +as a shaper in three lines of code. + +FQ_Codel is better. + +In other news: + +I have not tried the new 'adaptive red' implementation as a stand +alone qdisc, nor revisited SFB in light about what I now know about +GSO behavior. + +I would like to try QFQ and SFQ in combination to attempt to defeat +the bittorrent problem at some point. + +** Calculating a sane per-queue packet limit is an issue, too. + +iw10 requires a minimum of 10, and more likely 12 (fin, close) so... +In places we arbitrarily double that, and wave hands. I almost never +see packet drop with 24, which is far, far better than 1000. Might +need to be larger on gigE+. Might be wrong headed entirely. + +** Multicast + +We try to maltreat multicast especially in the QFQ implementation. + +When handed to a load balancing filter based on IPs, multicast +addresses are all over the map. It would be trivial to do a DOS with +this multi-bin setup. So we toss all multicast into a single bin +whenever possible. This is suboptimal, also. It would be good +to get multicast into the VO queue on wireless but bugs exist. + +Multicast concerns me also when using SFQ on general purpose ethernet. + +** Default Bins + +You can do tricks with the DEFAULTB concept, creating a filter to +optimize for ping, for example, which makes tests reproducable. (this +is done for wshaper and QFQ) Another example would be to set aside +bins for voip or dns, etc. Still, it is saner to just let the filter +do all the work of finding a decent bin. + +The only sane purpose for DEFAULTB at the moment is to have a safe +place to put QFQ packets until all the filters and bins are setup. + +* Other important debloat options + +There are many environment variables that can be set. Most +notably - the QMODEL var has various forms of AQM/FQ/shaper available. + +Available QMODELS are qfq, sfq, sfqred, efq and various combinations +thereof, as well as a hard coded 4mbit htb_sfq_red model, and emulations +of the original wondershaper and a mildly improved one. See the +tail end of the code for what is available. + +Most work on either ethernet or wireless and try to deal with +the problems of each. + +Usage of QFQ and the advanced SFQ options currently requires a new +version of iproute2 and a Linux 3.3 kernel and some patches. + +A byte Queue limit enabled device driver is required for ethernet, +except for when the HTB rate limiter is used. + +In all cases a Linux 3.3 or later kernel is required for best results. + +]] + +print(o) +-- print("Available Shaper Models Are:") +-- for i,v in pairs (ECALLBACKS) do +-- print(i) +-- end +print(s) +os.exit(-1) +end + +pingopt = function(...) return nil end +warn = function(...) return nil end + +function file_exists(name) + local f=open(name,"r") + if f ~= nil then f:close(); return true else return false end +end + +local function is_openwrt() + if file_exists("/etc/uci-defaults") then + return true + else + return false + end +end + +-- Override various defaults with env vars + +if is_openwrt() then + env.INSMOD = "/sbin/insmod" + env.ETHTOOL = "/usr/sbin/ethtool" + env.TC = "/usr/sbin/tc" +end + +-- pull params from conf file + +local function getconf() +end + +-- getenv pulls in everything as strings, +-- so do the conversion here + +local function fromenv(v) + local s = os.getenv(v) + if s == nil then return nil end +-- FIXME allow . + local m = string.match(s,"^%d+") + if m ~= nil then return tonumber(s) end + if s == "true" then return true end + if s == "false" then return false end + return s +end + +-- From the possible parameters in t, override o + +local function getenvs(t,o) + for i,v in pairs(t) do + local s = fromenv(v) + if s ~= nil then o[v] = s end + end + return o +end + +env = getenvs(params, env) + +if (env["IFACE"] == nil) then + usage("Error: The IFACE environment variable must be set") +end + +IFACE=env.IFACE +QMODEL=env.QMODEL +BINS=env.BINS +MULTICAST=BINS+1 +DEFAULTB=BINS+2 + +PREREQS = { "sch_qfq", "sch_codel", "sch_fq_codel", "cls_u32", "cls_flow", + "sch_sfq", "sch_red", "sch_htb", "cls_fw", "sch_efq_codel", + "sch_ns2_codel", "sch_nfq_codel" } + +-- we can get more complex later + +PREREQS2 = { + ["qfq"] = { "sch_qfq", "cls_u32", "cls_flow" }, + ["sfq"] = { "sch_sfq", "cls_u32", "cls_flow" }, + ["red"] = { "sch_qfq", "sch_red", "cls_u32", "cls_flow" }, + ["ared"] = { "sch_qfq", "sch_red", "cls_u32", "cls_flow" } +} + +-- FIXME: Merge multiple tables into one table on values in first +-- return table + +function merge(...) + local t = { } + for i,v in pairs(...) do + for i,v in pairs(v) do + t[v] = true + end + end + return t +end + +-- slurp a file + +function slurpf(file) + local f = open(file,"r") + if f ~= nil then + local s = f:read("*all") + f:close() + return s + end + return nil +end + +-- spew output into a command + +function spewc(command,s) + local f = popen(command,"w") + if f ~= nil then + local v = f:write(s) + f:close() + return v + end + return nil +end + +-- spew output into a file + +function spewf(file,s) + local f = open(file,"w") + if f ~= nil then + local v = f:write(s) + f:close() + return v + end + return nil +end + +-- slurp a file into a table + +function tslurpf(file) + local s = slurpf(file) + if s ~= nil then return s:split("\n") end + return nil +end + +-- return the output of a command as a big string + +function slurpc(command) + local f = popen(command,"r") + if f ~= nil then + local s = f:read("*all") + f:close() + return s + end + return nil +end + +-- return the output of a command as a table + +function tslurpc(command) + local s = slurpc(command) + if s ~= nil then return s:split("\n") end + return nil +end + +-- Some utility functions + +-- can't depend on 'wlan or eth' patterns, so try sysfs +-- FIXME: This needs to be made smarter and detect other forms +-- of tunnel. + +function interface_type(iface) + if iface == 'lo' then return('localhost') end + if iface:sub(1,3) == 'ifb' then return('ifb') end + if iface:find('%.') ~= nil then return('vlan') end + if iface:sub(1,3) == 'gre' then return('tunnel') end + if iface:sub(1,2) == 'br' then return('bridge') end + if file_exists(sf("/sys/class/net/%s/phy80211/name",iface)) then return ('wireless') end +return ('ethernet') +end + +local function ethtool_popen(...) + return popen(sf("%s %s 2>> %s",env.ETHTOOL,sf(...),env.DEBLOATLOG),"r") +end + +local function ethtool(...) + exec(sf("%s %s 2>> %s",env.ETHTOOL,sf(...),env.DEBLOATLOG)) +end + +-- lua doesn't have a split function. Grr. + +function string:split(sep) + local sep, fields = sep or ":", {} + local pattern = string.format("([^%s]+)", sep) + self:gsub(pattern, function(c) fields[#fields+1] = c end) + return fields +end + +-- return the modules already installed + +local function lsmod() + local t = { } + local k = { } + for i,v in pairs(tslurpc(env.LSMOD)) do + k = v:split(" ") + if k[1] ~= "Module" then + table.insert(t,k[1]) + end + end + return t +end + +-- take a table of modules to insert + +local function insmod(modules) + for i,v in pairs(modules) do + exec(sf("%s %s",env.INSMOD,v)) + end + return true +end + +-- there must be a more lua-specific way for this + +function exists(t,s) + for i,v in pairs(t) do + if v == s then return true end + end + return false +end + +-- Return pre-reqs not installed + +function prereq_check(prereqs) + s = lsmod() + local t = { } + for i,v in pairs(prereqs) do + if exists(s,v) == false then + table.insert(t,v) + end + end + return t +end + +-- install pre-reqs if not installed + +function kernel_prereqs(prereqs) + return insmod(prereq_check(prereqs)) +end + +function round(v) + return math.ceil(v-.5) +end + +function kleinrock(bandwidth, delay, flows) + return round(math.sqrt(flows)*bandwidth*delay) +end + +function bound(v,min,max) + if max < min then + local t = max + max = min + min = t + end + if v > min and v < max then return v end + if v > max then return max end + if v < min then return min end + return v +end + +function rbound(v,min,max) + return round(bound(v,min,max)) +end + +local function lowrate(rate) + if rate < 15000 then return true else return false end +end + +-- Don't like these side effects + +local function htb_est() + s = " " + if env.PPPOE then env.OVERHEAD=40 end + if env.ADSL then env.LINKLAYER="adsl" end + if lowrate(env.UPLINK) then env.R2Q=1 end +end + +-- ADSL overhead calculation for htb + +local function overhead(rate) + local s = " " + if env.OVERHEAD then s = s .. "OVERHEAD=" .. OVERHEAD .. " " end + if env.LINKLAYER then s = s .. "LINKLAYER=" .. env.LINKLAYER .. " " end + if lowrate(rate) then s = s .. "R2Q=1 " end + return(s) +end + +-- Attempt at finding useful values for sfqred +-- FIXME: re-read ared paper + +-- Yes, you want a depth=10 (or 15) limit, to cope with the increase +-- of CWND done by Google. + +-- Best thing would be to use SFQRED and headdrop, so that there is no +-- assumption on packet lengths. + +-- tc qdisc add ... sfq headdrop limit 200 depth 15 redflowlimit 50000 +-- min 5000 max 10000 probability 0.15 ecn + +-- Another often hidden assumption is that RTTs are 100ms + +target = 50 + +-- The problem with byte oriented red is that it will never +-- kick in or mark acks. + +local function redflowlimit(up,down) + local OVERHEAD=0 + if env.OVERHEAD then OVERHEAD=env.OVERHEAD end + bytes_per_ms = up / 12.5 + pkt_per_ms_worst = bytes_per_ms/(env.MTU + OVERHEAD) + pkt_per_ms_best = bytes_per_ms/(64 + OVERHEAD) + local limit = round(pkt_per_ms_best * target) + local perflow = round(pkt_per_ms_best) + local rlimit = rbound(target * bytes_per_ms,1500,15000) + local depth = 10 + return limit,rlimit,depth,pkt_per_ms_worst +end + +--print(redflowlimit(4000,20000)) +--print(redflowlimit(2000,20000)) +--print(redflowlimit(400,2000)) + +-- Choke is packet oriented at the UI, but bytes underlie it... + +local function chokelimit(up,down) + local OVERHEAD=0 + if env.OVERHEAD then OVERHEAD=env.OVERHEAD end + bytes_per_ms = up / 12.5 + pkt_per_ms_worst = bytes_per_ms/(env.MTU + OVERHEAD) + pkt_per_ms_best = bytes_per_ms/(64 + OVERHEAD) + local limit = round(pkt_per_ms_best * target) + local perflow = round(pkt_per_ms_best) + local rlimit = rbound(target * bytes_per_ms,1500,15000) + local depth = 10 + return limit,rlimit,depth,pkt_per_ms_worst +end + +-- Some htb info that is probably hopelessly out of date + +-- Counting packets with quantum can be strange. If we have a low rate +-- class (rate = 5kbit), default quantum = 5000 / 10 = 500 bytes. But +-- most packets are more then 500 bytes. Htb version 1 and 2 uses DRR, +-- so a packet larger then 1000 bytes will be sent and it will +-- remember how much it sent and wait until the packet is paid back +-- before another packet is send. So if you send 1000 byte, next time +-- the class is polled, you will not be allowed to send. + +-- Htb3 uses the WRR scheduler. When a packet with size > quantum is +-- sent, it will be sent and an error that the quantum is too small +-- will be logged. But there is no pay back. The WRR scheduler is +-- faster then the DRR scheduler. So make sure quantum is bigger then +-- the default packet size. For 15 kbyte/s and default r2q, quantum is +-- 1500 and this is exactly the maximum packet size. If you want to +-- tune htb for rates smaller then 15 kbyte/s, you can manually set +-- the r2q and/or quantum. + +-- FIXME: +-- We want to capture the characteristics of +-- the interface in a table. +-- So we need to parse the output of ethtool better +-- ["tx-ring"] = X +-- ["speed"] = X +-- etc +-- return a hash of the properties of the interface + +-- terrific, this is hard to parse. +-- ethtool -g eth0 + +-- Ring parameters for eth0: +-- Pre-set maximums: +-- RX: 4096 +-- RX Mini: 0 +-- RX Jumbo: 0 +-- TX: 4096 +-- Current hardware settings: +-- RX: 256 +-- RX Mini: 0 +-- RX Jumbo: 0 +-- TX: 64 + +-- ethtool -g wlan0 +-- Ring parameters for wlan0: +-- Pre-set maximums: +-- RX: 0 +-- RX Mini: 0 +-- RX Jumbo: 0 +-- TX: 0 +-- Current hardware settings: +-- RX: 0 +-- RX Mini: 0 +-- RX Jumbo: 0 +-- TX: 0 + +-- -k is easier +-- ethtool -k eth0 +-- Offload parameters for eth0: +-- rx-checksumming: on +-- tx-checksumming: on +-- scatter-gather: on +-- tcp-segmentation-offload: off +-- udp-fragmentation-offload: off +-- generic-segmentation-offload: off +-- generic-receive-offload: on +-- large-receive-offload: off +-- rx-vlan-offload: on +-- tx-vlan-offload: on +-- ntuple-filters: off +-- receive-hashing: off + +function string:trim () + return (string.gsub(self, "^%s*(.-)%s*$", "%1")) +end + +function offloads(iface) + local t = { } + local s = tslurpc(sf("%s -k %s",env.ETHTOOL,iface)) + if s ~= nil then + for i,v in ipairs(s) do + if v == "no offload info available" then + return nil + end + end + + for i,v in ipairs(s) do + local h = v:split(":") + local j = h[1]:split(" ") + if # j > 1 then + if j[1] ~= "Offload" then + if # h > 1 then + t[h[1]:trim()] = h[2]:trim() + end + -- FIXME: should probably change 'off' and 'on' to false and true + end + end + end + end + return t +end + +-- test the offloads problem +-- we have a wide range of possible inputs to test against as yet + +function test_offloads(iface) + local o = offloads(iface) + if o ~= nil then + for i,v in pairs(o) do + print(sf("%s %s",i,v)) + end + end +end + +-- test_offloads("eth1") + +-- FIXME - could use a little more thought on creating the +-- hash + +function ring_params(iface) + local t = { } + local state = 0 + for i,v in ipairs(tslurpc(sf("%s -g %s",env.ETHTOOL,iface))) do + local h = v:split(":") + local j = h[1]:split(" ") + if j[1] == "Ring" or j[1] == "Pre-set" then + -- do nothing + elseif j[1] == "Current" then state = 1 + elseif state == 0 then + t['max_' .. h[1]:trim()] = h[2]:trim() + elseif state == 1 then + t['cur_' .. h[1]:trim()] = h[2]:trim() + end + end + return t +end + +function test_ring_params(iface) + local o = ring_params(iface) + for i,v in pairs(o) do + print(sf("%s %s",i,v)) + end +end + +-- test_offloads(IFACE) +-- test_ring_params(IFACE) + +function iface_get(iface) +end + +-- return number of hardware queues found + +local function bql_setup(iface) + local c = 0 + while spewf(sf("/sys/class/net/%s/queues/tx-%d/byte_queue_limits/limit_max",iface,c), + env.MAX_HWQ_BYTES) ~= nil do + c = c + 1 + end + return c +end + +-- Maybe better done with ethtool + +local function speed_set(iface,speed) + return spewf(sf("/sys/class/net/%s/speed",iface),speed) +end + +local function speed_get(iface) + return slurpf(sf("/sys/class/net/%s/speed",iface)) +end + +local function rate_quantum(rate) + if rate < 10000 then return(1500) end + return(1500) +end + +-- Doing this as a lookup table hurt lua +-- FIXME: Not clear how to reset to advertising all +-- Not clear how to reset this parameter from +-- userspace to autonegotiate +-- What to do with non-sensical values that you +-- get before an interface is live? +-- Maybe use ethtool speed option? + +local function advertise_speed(s) + local x = "0x000" + if s < 10001 then x = "0x1000" end + if s < 1001 then x = "0x020" end + if s < 101 then x = "0x008" end + if s < 11 then x = "0x002" end + if s == 0 then x = "0x000" end + return x +end + +-- TSO does terrible things to the scheduler +-- GSO does as well +-- UFO is not a feature of most devices + +-- In the long run I think we want to disable +-- TSO and GSO entirely below 100Mbit. I'd +-- argue for same for gigE, too, for desktops + +local function ethernet_setup(iface) + local o = offloads(iface) + local tx = ring_params(iface) + +-- for testing, limit ethernet to SPEED + + if env.FORCE_SPEED then + ethtool(sf("-s %s advertise %s",iface, + advertise_speed(env.FORCE_SPEED))) + end + if env.FORCE_RING then + if env.FORCE_RING < tx['cur_TX'] then + ethtool(sf("-G %s tx %d",iface,env.FORCE_RING)) + end + end + local queues = bql_setup(iface) + if env.TSO == nil then + ethtool("-K %s gso off",iface) + ethtool("-K %s tso off",iface) + ethtool("-K %s ufo off",iface) + ethtool("-K %s gro off",iface) + end + return queues +end + +-- Some TC helpers + +-- rates are specified as kilobits (xkbits) +-- does this need to be integer? +-- FIXME the effect of rounding really isn't what we want + +local function r2s(rate) + if round(rate) < 1000 then return round(rate) .. "kbit" end + local r = rate/1000 + if round(r) < 1000 then return round(r) .. "mbit" end + r = r/1000 + return round(r) .. "gbit" +end + +-- print(r2s(10)) +-- print(r2s(1001)) +-- print(r2s(1050)) -- yea, rounding bad +-- print(r2s(999999.4)) +-- print(r2s(10000000.6)) +-- print(r2s(1000000)) + +-- TC tends to be repetitive and hard to read +-- So these function shorten things considerably by doing +-- the "{class,qdisc,filter} add dev %s" for us +-- It also means lua keeps less unique strings around. + +-- Constructing something that was ** reversible ** +-- and cleaner to express would be better that this + +local castring=sf("class add dev %s ", env.IFACE) +local fastring=sf("filter add dev %s ",env.IFACE) +local qastring=sf("qdisc add dev %s ", env.IFACE) + +-- Similarly, the 'parent' string is often used + +local capstring=sf("class add dev %s parent ", env.IFACE) +local fapstring=sf("filter add dev %s parent ",env.IFACE) +local qapstring=sf("qdisc add dev %s parent ", env.IFACE) + +local function ca(...) + return tc:write(castring,sf(...),"\n") +end + +local function cap(...) + return tc:write(capstring,sf(...),"\n") +end + +local function fa(...) + return tc:write(fastring,sf(...),"\n") +end + +local function fap(...) + return tc:write(fapstring,sf(...),"\n") +end + +local function qa(...) + return tc:write(qastring,sf(...),"\n") +end + +local function qap(...) + return tc:write(qapstring,sf(...),"\n") +end + +-- FIXME sanely calculate htb rate, overhead, etc, etc + +local function hap(parent,child,rate,str) + cap("%x: classid %x:%x %s htb rate %skibit mtu %d mpu 64 quantum %d %s", + parent,parent,child,est,rate,env.MTU,quantum,str) +end + +local function opentc() + return popen(sf("%s %s",env.TC, env.TCARG),'w') +end + +function ingress() + if env.INGRESS then + kernel_prereqs({"sch_ingress"}) + local tc = opentc() + tc:write(sf("qdisc del dev %s ingress\n",IFACE)) + tc:close() + end +end + +function resettc() + local tc=opentc() + tc:write(sf("qdisc del dev %s root\n",IFACE)) + tc:close() + return opentc() +end + +-- QFQ: Create a bin attached to the parent class + +local function cb(base,bin,disc) + cap("%x classid %x:%x qfq",base,base,bin) + qap("%x:%x %s",base,bin,disc) +end + +-- FIXME: It would be nice to have a cleaner way to match all multicast + +local function fa_mcast(parent) + fap("%x: protocol ip prio 5 u32 match u8 0x01 0x01 at -14 flowid %x:%x",parent,parent,MULTICAST) + fap("%x: protocol ipv6 prio 6 u32 match u8 0x01 0x01 at -14 flowid %x:%x",parent,parent,MULTICAST) + fap("%x: protocol arp prio 7 u32 match u8 0x01 0x01 at -14 flowid %x:%x",parent,parent,MULTICAST) +end + +local function fa_defb(parent) + fap("%x: protocol all prio 999 u32 match ip protocol 0 0x00 flowid %x:%x",parent,parent,DEFAULTB) +end + +-- FIXME: This needs a correct hash for natted sources when NAT=y and ipv6 +-- handle 3 repeated bad? + +local function fa_bins(parent) +if env.NAT then + fap("%x: handle 3 protocol ipv6 prio 94 flow hash keys proto-dst,rxhash divisor %d",parent,env.BINS) + fap("%x: handle 4 protocol all prio 97 flow hash keys proto-dst,nfct-src divisor %d",parent,env.BINS) +else + fap("%x: handle 3 protocol all prio 97 flow hash keys proto-dst,rxhash divisor %d",parent,env.BINS) +end +pingopt(sf("%x:",parent),sf("%x:%x",parent,DEFAULTB)) +-- At one point I was trying to handle ipv6 separately +-- fa("protocol ipv6 parent %x: handle 4 prio 98 flow hash keys proto-dst,rxhash divisor %d",parent,env.BINS) +end + +local function faip_bins(parent) +if env.NAT then + fap("%x: handle 3 protocol ipv6 prio 94 flow hash keys src divisor %d",parent,env.BINS) + fap("%x: handle 4 protocol all prio 97 flow hash keys nfct-src divisor %d",parent,env.BINS) +else + fap("%x: handle 3 protocol all prio 97 flow hash keys src divisor %d",parent,env.BINS) +end + +pingopt(sf("%x:",parent),sf("%x:%x",parent,DEFAULTB)) +-- At one point I was trying to handle ipv6 separately +-- fa("protocol ipv6 parent %x: handle 4 prio 98 flow hash keys proto-dst,rxhash divisor %d",parent,env.BINS) +end + +local function q_bins(parent) + for i=0,env.BINS + do + cap("%x: classid %x:%x qfq",parent,parent,i) + qap("%x:%x %s",parent,i,env.BIGDISC) + end +end + +-- We can do simple per-stream load balancing across multiple hardware +-- queues thusly. This assumes your IPv6 isn't natted.... + +local function mqprio_bins(parent,queues) +if env.NAT then + fap("%x: handle 3 protocol ipv6 prio 94 flow hash keys proto-dst,rxhash divisor %d",parent,queues) + fap("%x: handle 4 protocol all prio 97 flow hash keys proto-dst,nfct-src divisor %d",parent,queues) +else + fap("%x: handle 3 protocol all prio 97 flow hash keys proto-dst,rxhash divisor %d",parent,queues) +end +-- At one point I was trying to handle ipv6 separately +-- fa("protocol ipv6 parent %x: handle 4 prio 98 flow hash keys proto-dst,rxhash divisor %d",parent,BINS) +end + +-- Eric's Enhanced SFQ + +-- FIXME: originally hard coded for 200Mbit +-- I'm going to argue that depth, flows, speed all need to be +-- done via something kleinrock-like. The problem is that +-- we don't know the delay without hitting the next hop +-- And we can't get the next hop until after the interface is +-- up. And even then we can only measure RTT, which is off +-- by a factor of three on the two different systems I've looked at + +-- FIXME: I don't think I should be measuring speed in megabits + +-- Eric's original code had a mtu of 40000, which I assume is needed +-- for TSO/GSO to work. These quantums are way too large for lower +-- speeds + +--[[ FIXME: Add ADSL support +My crude hack was adding: +tc_stab_string="stab overhead 18 mtu 2048 mpu 53 linklayer atm" +to generate.sh; and then modifying the addition of root disc like the following +tc qdisc add dev $dev root handle 1: ${tc_stab_string} hfsc default ${class_default}0 +Then I only needed to reduce the uplink and downlink speed marginally (5%) and got good and stable ping latencies even in the lieu of massive uploads and opening 100 browser tabs at the same time. Without the stab option I had to reduce nominal speeds to around 65%-70% of the line rate and still got worse ping latencies than with the stab option. +--]] + +local function htb_sfq(speed,flows) + qa("root handle 1: est 1sec 8sec htb default 1") + cap("1: classid 1:1 est 1sec 8sec htb rate 200Mbit mtu 1500 quantum 80000") + qap("1:1 handle 10: est 1sec 8sec sfq limit 2000 depth 10 headdrop flows 1000 divisor 16384") +end + +-- local function htb_sfq_red(speed,flows) +-- hard coded, no math, reasonable for ~4Mbit + +local function htb_sfq_red(queues) + qa("root handle 1: est 1sec 8sec htb default 1") + cap("1: classid 1:1 est 1sec 8sec htb rate %dkibit mtu 1500 quantum 1514",env.UPLINK) + qap("1:1 handle 10: est 1sec 4sec sfq limit 200 headdrop flows 500 divisor 16384 redflowlimit 24000 min 3000 max 18000 depth 16 probability 0.12 ecn harddrop") +end + +-- This simplified model is intended to work best with the out of tree +-- sfq that has enqueuing to head. It merely looks for background traffic +-- and tosses that into a separate bucket. + +-- Rate and ceil + +local function htb_sfq_red2(queues) + BKTRAFFIC=rbound(env.UPLINK *.05, 32, env.UPLINK*.90) + BKTRAFFIC_MAX=rbound(env.UPLINK*.85,32,env.UPLINK*.90) + MAXTRAFFIC=round(env.UPLINK-BKTRAFFIC) + qa("root handle 1:0 est 1sec 8sec htb default 1") + cap("1:0 classid 10:1 est 1sec 8sec htb prio 1 rate %dkbps ceil %dkbps mtu 1500 quantum 1514", + env.UPLINK, env.UPLINK) + cap("10:1 classid 11:1 est 1sec 8sec htb prio 2 rate %dkbps ceil %dkbps mtu 1500 quantum 1514", + BKTRAFFIC,BKTRAFFIC_MAX) + cap("10:1 classid 12:1 est 1sec 8sec htb prio 2 rate %dkbps ceil %dkbps mtu 1500 quantum 1514", + BKTRAFFIC,BKTRAFFIC_MAX) + qap("10:1 handle 12:0 est 1sec 4sec sfq limit 300 headdrop flows 2000 divisor 16384 redflowlimit 24000 min 3000 max 18000 depth 43 probability 0.12 ecn harddrop") + qap("11:1 handle 20:0 est 1sec 4sec sfq limit 300 headdrop flows 2000 divisor 16384 redflowlimit 64000 min 3000 max 18000 depth 43 probability 0.12 ecn harddrop") + fap("1:1 prio 10 u32 match ip tos 0x08 0xff flowid 11:1") +-- fap("1:0 prio 11 u32 match ipv6 tos 0x08 0xfc flowid 1:2") + +end + +local function htb_sfq_red_cero(queues) + qa("root handle 1: est 1sec 8sec htb default 1") + cap("1: classid 1:1 est 1sec 8sec htb rate 4Mbit mtu 1500 quantum 4500") + qap("1:1 handle 10: est 1sec 4sec sfq limit 120 perturb 60000") +end + +local function efq(parent, handle, speed, flows) + qap(sf("%s handle %x: est 1sec 8sec sfq limit 2000 depth 12 headdrop flows %d divisor 16384", + parent,handle,flows)) +end + +local function efqr(parent, handle, speed, flows) + qap("%s handle %x: est 1sec 4sec sfq limit 200 quantum 3028 perturb 600 headdrop flows %d depth 24 divisor 16384 redflowlimit 40000 min 4500 max 18000 probability 0.20 ecn",parent,handle,flows) +end + +function iptables4(...) + exec(sf("iptables %s",...)) +end + +function iptables6(...) + exec(sf("ip6tables %s",...)) +end + +function iptables(...) + iptables4(...) + iptables6(...) +end + +function recreate_filter(t) + assert(t.chain, "ERROR: chain parameter is missing!") + assert(t.table, "ERROR: table parameter is missing!") + iptables(sf("-t %s -F %s", t.table, t.chain)) + iptables(sf("-t %s -X %s", t.table,t.chain)) + iptables(sf("-t %s -N %s", t.table,t.chain)) +end + +-- Builds up a list of iptables commands to flush the qos_* chains, +-- remove rules referring to them, then delete them + +-- Print rules in the mangle table, like iptables-save +-- Find rules for the qos_* chains +-- Exclude rules in qos_* chains (inter-qos_* refs) + -- Replace -N with -X and hold, with -F and print +-- Replace -A with -D +-- Print held lines at the end (note leading newline) +-- Make into proper iptables calls +-- Note: awkward in previous call due to hold space usage +-- Magic borrowed from openwrt generate.sh + +function iptables_aqm_clean() + tstr="-t mangle -S | grep '^-N qos_\|-j qos_' | grep -v '^-A qos_' | sed -e '/^-N/{s/^-N/-X/;H;s/^-X/-F/}' -e 's/^-A/-D/' -e '${p;g}' | sed -n -e 's/^./iptables -t mangle &/p'" + iptables4(tstr) + tstr="-t mangle -S | grep '^-N qos_\|-j qos_' | grep -v '^-A qos_' | sed -e '/^-N/{s/^-N/-X/;H;s/^-X/-F/}' -e 's/^-A/-D/' -e '${p;g}' | sed -n -e 's/^./ip6tables -t mangle &/p'" + iptables6(tstr) +end + +-- ${iptrules:+${iptrules}${N}iptables -t mangle -A qos_${cg}_ct -j CONNMARK --save-mark --mask 0xff} +-- iptables -t mangle -A qos_${cg} -j CONNMARK --restore-mark --mask 0xff +-- iptables -t mangle -A qos_${cg} -m mark --mark 0/0xff -j qos_${cg}_ct +-- $pktrules +-- $up$N${down:+${down}$N} + +FW_PREREQ = { "ipt_multiport", "ipt_CONNMARK" } +DL_PREREQ = { "cls_u32", "em_u32", "act_connmark", "act_mirred", "sch_ingress" } +OWRT_PREREQ = { "cls_fw", "sch_hfsc","sch_sfq", "sch_red" } + +function iptm(...) + iptables(sf("-t mangle %s", sf(...))) +end + +function aqm_init(cg) + iptm("-N qos_%s",cg) + iptm("-N qos_%s_ct",cg) + iptm("-A qos_%s_ct -j CONNMARK --save-mark --mask 0xff",cg) + iptm("-A qos_%s -j CONNMARK --restore-mark --mask 0xff",cg) + iptm("-A qos_%s -m mark --mark 0/0xff -j qos_%s_ct",cg,cg) +-- $pktrules +-- $up$N${down:+${down}$N} +end + +local function mcast_classify(chain,class) + iptables(sf("-t mangle -A %s -m pkttype ! --pkt-type unicast -j CLASSIFY --set-class %s",chain,class)) +end + +local ds = { ["BE"]=0, ["AF11"]=10, ["AF12"]=12, ["AF13"]=14, + ["AF21"]=18, ["AF22"]=20, ["AF23"]=22, ["AF31"]=26, + ["AF32"]=28,["AF33"]=30, ["AF41"]=34, ["AF42"]=36, + ["AF43"]=38, ["EF"]=46, ["CS1"]=8, ["CS2"]=16, + ["CS3"]=24, ["CS4"]=32, ["CS5"]=40, ["CS6"]=48, + ["CS7"]=56, ["BOFH"]=4, ["ANT"]=42, ["LB"]=63, ["P2P"]=9 + } + +-- No matter what I try I get this wrong. You would think 1:1,2,3,4 +-- was the right thing. Nope. +-- So 1:1 doesn't work. 1:10 doesn't work. Trying 1:101 etc because +-- that's a magic value and.... +-- And IPv6 multicast is never matched. + +local function mac80211e() + + local t = "-t mangle -A W80211e -m dscp --dscp %d -j CLASSIFY --set-class 0:%d -m comment --comment '%s'" + local function f(...) + iptables(sf(t,...)) + end + + recreate_filter({table="mangle",chain="W80211e"}) + + iptables("-t mangle -A W80211e -j CLASSIFY --set-class 0:103 -m comment --comment 'Reclassify BE'") + f(ds.EF, 106,'Voice (EF)') + f(ds.CS6, 106,'Critical (VO)') + f(ds.ANT, 105,'Ants(VI)') + f(ds.BOFH,105,'Typing (VI)') + f(ds.AF41,105,'Net Radio(VI)') + f(ds.CS3, 105,'Video (VI)') + f(ds.CS1, 101,'Background (BK)') + f(ds.CS5, 101,'General Stuff (BK)') + f(ds.P2P, 101,'P2P (BK)') + f(ds.CS2, 101,'Background (BK)') + f(ds.AF33,101,'Background (AF33)') + mcast_classify("W80211e","0:103") +end + +-- Iptables wrappers that we need due to lack of filters. Maybe use a +-- DEBLOAT chain. It would be good to have a universal number to +-- reduce the number of match rules iptables -t mangle -o iface -I +-- POSTROUTING -m multicast ! unicast --classify 1:1 + +local function iptables_probe(iface,rule) +end + +local function iptables_remove(iface,rule) +end + +local function iptables_insert(iface,rule) + iptables(sf("-t mangle -o %s -A POSTROUTING -j %s",iface,rule)) +end + +-- Basic SFQ on wireless + +-- FIXME: We must get ALL multicast out of the other queues +-- and into the VO queue. Always. Somehow. + +-- It also makes sense to do EF into the VO queue and match the +-- default behavior inside of the MAC80211 code for scheduling +-- purposes. + +local function wireless_filters() +-- FIXME: We need filters to use the various queues +-- The only way to get them is to use iptables presently +-- and even that's not working +end + +local function wireless_setup(queuetype) + qa("handle 1 root mq") + qap("1:1 handle %x %s",VO, queuetype) + qap("1:2 handle %x %s ",VI, queuetype) + qap("1:3 handle %x %s ",BE, queuetype) + qap("1:4 handle %x %s",BK, queuetype) + wireless_filters() +end + + +local function wireless_qlen_change(base) + for i=1, # qlens do + q = qlens[i] + d = sf("%s/%s",base,q) + qlen = slurpf(d) + if qlen ~= nill and qlen ~= env[q] then + spewf(d,env[q]) + end + end +end + +local function wireless_qlen() +-- Sure we could inspect things here but whatever + wireless_qlen_change(sf("%s/%s",wireless_debug,"phy0/ath9k")) + wireless_qlen_change(sf("%s/%s",wireless_debug,"phy1/ath9k")) + wireless_qlen_change(sf("%s/%s",wireless_debug,"phy2/ath9k")) +end + +local function wireless_setup_ll(queuetype) + qa("handle 1 root mq") + qap("1:1 handle %x %s limit 800 noecn target 10ms quantum 500",VO, queuetype) + qap("1:2 handle %x %s limit 800 ecn quantum %d",VI, queuetype, env.CODEL_LL_QUANTUM) + qap("1:3 handle %x %s limit 1000 ecn quantum %d",BE, queuetype, env.CODEL_LL_QUANTUM) + qap("1:4 handle %x %s limit 1000 noecn ",BK, queuetype) + wireless_filters() +end + +-- ingress model +-- this is why I wanted my tc things +-- to be objects, e.g: +-- dev = tc.new() +-- dev:qa ifb:qa + +local function sfqred_ingress() + RATE=env.DOWNLINK + if RATE == nil then return end + kernel_prereqs({"sch_cbq","act_mirred","cls_fw"}) + IFB="ifb0" + ALLOT=1524 -- not needed? + ingress() + qa("ingress") + fap("ffff: protocol all u32 match u32 0 0 flowid 1:1 action mirred egress redirect dev %s",IFB) + -- Lets say our NIC is 100Mbit + tc:write(sf("qdisc add dev %s root handle 1: cbq avpkt 1000 rate 100Mbit bandwidth 100Mbit\n",IFB)) +-- FIXME: allocate bandwidth from underlying interface + tc:write(sf("class add dev %s parent 1: classid 1:1 cbq allot 1524 mpu 64 rate 100Mbit prio 1 bandwidth 100Mbit maxburst 150 avpkt 1500 bounded\n", IFB)) + +-- Class for traffic coming from Internet : limited to X Mbits + + tc:write(sf("class add dev %s parent 1:1 classid 1:11 cbq allot %d mpu 64 rate %dkbit bandwidth %dkbit maxburst 80 minburst 40 prio 2 avpkt 1400 bounded\n", IFB, ALLOT, RATE, RATE)) + + tc:write(sf("qdisc add dev %s parent 1:11 handle 11: sfq limit 300 quantum 1500 headdrop flows 2048 divisor 16384 redflowlimit 60000 min 9000 max 27000 probability 0.20 ecn harddrop\n", IFB)) + +-- Traffic from machines in our LAN : no limit + +--for i,privnet in pairs(LOCALNETS) do +-- tc:write(sf("filter add dev %s parent 1: protocol ip prio 2 u32 match ip src %s flowid 1:1\n", IFB, privnet)) +-- end + + tc:write(sf("filter add dev %s parent 1: protocol all prio 2 u32 match ip protocol 0 0x00 flowid 1:11\n", IFB)) +end + +-- Various models + +local function wireless_efq_codel() + wireless_setup("efq_codel limit 1000 quantum 1000") +end + +local function wireless_efq_codel_ll() + wireless_setup_ll("efq_codel") +end + +local function wireless_nfq_codel() + wireless_setup("nfq_codel limit 1000 quantum 1000") +end + +local function wireless_nfq_codel_ll() + wireless_setup_ll("nfq_codel") +end + +local function wireless_fq_codel() + wireless_setup("fq_codel limit 1000 quantum 1000") +end + +local function wireless_fq_codel_ll() + wireless_setup_ll("fq_codel") +end + +local function wireless_codel() + wireless_setup("codel limit 1000") +end + +local function wireless_ns2_codel() + wireless_setup("ns2_codel limit 1000") +end + +local function wireless_sfq() + wireless_setup("sfq limit 40 perturb 6000") +end + +-- erics sfq and erics sfqred with +-- some arbitrary speeds and bandwidths (unused) +-- TiQ would be better + +local function wireless_efq() + qa("handle 1 root mq") + efq("1:1",VO,20,30) + efq("1:2",VI,50,20) + efq("1:3",BE,150,100) + efq("1:4",BK,30,10) + wireless_filters() +end + +local function wireless_efqr() + qa("handle 1 root mq") + efqr("1:1",VO,20,30) + efqr("1:2",VI,50,20) + efqr("1:3",BE,150,2000) + efqr("1:4",BK,30,10) + wireless_filters() +end + +-- FIXME: add HTB rate limiter support for a hm gateway +-- What we want are various models expressed object orientedly +-- so we can tie them together eventually +-- This is not that. We ARE trying to get to where the numbering +-- schemes are consistent enough to tie everything together +-- sanely... + +local function model_qfq_subdisc(base) + cb(base,MULTICAST,env.MDISC) + cb(base,DEFAULTB,env.NORMDISC) + fa_defb(base) + fa_mcast(base); + q_bins(base); + fa_bins(base); +end + +local function model_choke_subdisc(base) + env.BIGDISC=sf("choke bandwidth %dkbit limit 24 min 4 max 12 probability .2 ecn",env.UPLINK) + model_qfq_subdisc(base) +end + +local function one_over_ip(base) + cb(base,MULTICAST,env.MDISC) + cb(base,DEFAULTB,env.NORMDISC) + fa_defb(base) + fa_mcast(base); + q_bins(base); + faip_bins(base); +end + +-- DRR + +local function drr(queues) + kernel_prereqs({"sch_drr"}) + qa("root handle 1: drr") + cap("1: classid 1:%x drr",env.BINS+1) + cap("1: classid 1:%x drr",env.BINS+2) + qap("1:%x pfifo_head_drop limit 16",env.BINS+1) + qap("1:%x pfifo_head_drop limit 16",env.BINS+2) + fa_defb(1) + fa_mcast(1) + for i=1,env.BINS do + cap("1: classid 1:%x drr",i) + qap("1:%x pfifo_head_drop limit 16",i) + end + fap("%x: handle 10 protocol all prio 10 flow hash keys proto-src,dst divisor %d perturb 10", 1, env.BINS); +end + +-- Choke + +local function choke(queues) + kernel_prereqs({"sch_choke"}) + if env.UPLINK ~= nil then + local up=env.UPLINK + local quantum = rate_quantum(up) + local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) + local mtu = env.MTU +-- local limit,min,max = chokelimit(up,env.DOWNLINK) + qa("root handle 1: choke bandwidth %dkbit limit 32 min 4 max 12 probability .2 ecn", + up) + end + +end + +-- hap + +-- Using the same parameters as in Section V, we can estimate , queue +-- size oscillates the minimal N to be 8.08. When packets, constantly +-- turning CHOKe on and off, around as shown in Fig. 9 (compare with +-- Fig. 8). When is small, the equilibrium model in Section II no +-- longer holds. The same phenomenon is observed when increases (with +-- fixed ). The lower bound on dropping probability when CHOKe is +-- active, , eventually prevents TCP flows from making full use of the +-- available capacity. A positive effect is that the queue length is +-- controlled to stay around 20. We have also simulated with more th + + +local function htb_choke(queues) + kernel_prereqs({"sch_choke", "sch_htb"}) + if env.UPLINK ~= nil then + local up=env.UPLINK + local quantum = rate_quantum(up) + local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) + local mtu = env.MTU + qa("root handle 1: %s htb default 1", est) + cap("1: classid 1:1 %s htb burst 64 rate %dkibit mtu 1500 mpu 64", est, up) +-- qap("1:1 handle 10: choke bandwidth %dkbit limit 32 min 4 max 16 probability .2 ecn", +-- up) +-- at 4Mbit, choke will keep 2 .. 8 packets for min, max if +-- you just set the bandwidth + -- at 40mbit choke will keep 4..12 +-- from the doc, this is bytes. From the code, this turns pkts into avpkts?? + qap("1:1 handle 10: choke bandwidth %dkbit limit 24 min 2 max 12 probability .2 ecn", up) + end +end + +-- SFB + +local function sfb(queues) + kernel_prereqs({"sch_sfb"}) + qa("root handle 1: sfb") +end + +local function htb_sfb(queues) + kernel_prereqs({"sch_sfb","sch_htb"}) + if env.UPLINK ~= nil then + local up=env.UPLINK + local quantum = rate_quantum(up) + local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) + local mtu = env.MTU + qa("root handle 1: %s htb default 1", est) + cap("1: classid 1:1 %s htb burst 64 rate %dkibit mtu 1500 mpu 64 quantum 1500", est, up) + qap("1:1 handle 10: sfb limit 40") + end +end + +local function tbf(queues) + kernel_prereqs({"sch_tbf"}) + if env.UPLINK ~= nil then + local up=env.UPLINK + local quantum = rate_quantum(up) + local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) + local mtu = env.MTU + + qa("root handle 1: %s tbf rate %dkbit burst 5k latency 5ms minburst 1540", est, up) + end +end + +local function htb_qfq_sfqred(queues) + kernel_prereqs({"sch_sfq","sch_qfq"}) + if env.UPLINK ~= nil then + local up=env.UPLINK + local quantum = rate_quantum(up) + local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) + local mtu = env.MTU + + qa("root handle 1: est 1sec 2sec htb default 1") + cap("1: classid 1:1 est 1sec 2sec htb burst 64 rate %dkibit mtu 1500 mpu 64 quantum 1500",up) + qap("1:1 handle %x qfq",10) + -- FIXME, do the calculation for the bandwidth + env.BIGDISC="sfq limit 80 headdrop quantum 1500 flows 200 divisor 16384 redflowlimit 9000 min 1500 max 4500 depth 12 probability 0.2 ecn harddrop" +-- env.BIGDISC="sfq limit 80" + + one_over_ip(10) + end +end + +local function htb_qfq_drop_head(queues) + if env.UPLINK ~= nil then + local up=env.UPLINK + local quantum = rate_quantum(up) + local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) + local mtu = env.MTU + + qa("root handle 1: est 1sec 8sec htb default 1") + cap("1: classid 1:1 est 1sec 8sec htb rate %dkbit mtu 1500 quantum 1500",up) + qap("1:1 handle %x qfq",10) + model_qfq_subdisc(10) + + end +end + +local function htb_qfq_choke(queues) + if env.UPLINK ~= nil then + local up=env.UPLINK + local quantum = rate_quantum(up) + local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) + local mtu = env.MTU + + qa("root handle 1: est 1sec 8sec htb default 1") + cap("1: classid 1:1 est 1sec 8sec htb rate %dkbit mtu 1500 quantum 1500",up) + qap("1:1 handle %x qfq",10) + model_choke_subdisc(10) + + end +end + +-- FIXME: Finish this up + +local function model_qfq_ared(base) + cb(base,MULTICAST,env.MDISC) + cb(base,DEFAULTB,env.NORMDISC) + fa_defb(base) + fa_mcast(base); + q_bins(base); + fa_bins(base); +end + +local function model_qfq_red(base) + cb(base,MULTICAST,env.MDISC) + cb(base,DEFAULTB,env.NORMDISC) + fa_defb(base) + fa_mcast(base); + q_bins(base); + fa_bins(base); +end + +local function model_sfq(base) + qa("parent %x sfq",base) +end + +-- Wireless devices are multi-queued - BUT the hardware +-- enforces differences in behavior vs the queues +-- (actually hostapd does that) +-- FIXME: get a grip on lua iterators + +local function wireless_qfq() + wireless_setup("qfq") + for i,v in ipairs(WQUEUES) do + model_qfq_subdisc(v) + end +end + +local function wireless_qfqr() + wireless_setup("qfq") + for i,v in ipairs(WQUEUES) do + model_qfq_ared(v) + end +end + +-- FIXME: just stubs for now + +local function wireless_ared() + qa("handle 1 root mq") + for i,v in ipairs(WQUEUES) do + model_qfq_ared(v) + end + wireless_filters() +end + +-- FIXME: just stubs for now + +local function wireless_red() + qa("handle 1 root mq") + for i,v in ipairs(WQUEUES) do + model_qfq_red(v) + end + wireless_filters() +end + +-- FIXME - mqprio might not be available +-- FIXME - rethink multi-queue idea + +local function ethernet_qfq(queues) + c = queues +-- for i=0,c do + if queues > 1 then + qa("handle %x root qfq",10) + else + qa("handle %x root qfq",10) + end + model_qfq_subdisc(10) +end + +local function ethernet_efq_codel(queues) + qa("handle %x root efq_codel limit 1000 ",10) +end + +local function ethernet_efq_codel_ll(queues) + qa("handle %x root efq_codel limit 1000 quantum 1000 ",10) +end + +local function ethernet_nfq_codel(queues) + qa("handle %x root nfq_codel limit 1000 ",10) +end + +local function ethernet_nfq_codel_ll(queues) + qa("handle %x root nfq_codel limit 1000 quantum 1000 ",10) +end + +local function ethernet_fq_codel(queues) + qa("handle %x root fq_codel",10) +end + +local function ethernet_fq_codel_ll(queues) + qa("handle %x root fq_codel limit 1000 quantum 1000 ",10) +end + +local function ethernet_codel(queues) + qa("handle %x root codel",10) +end +local function ethernet_ns2_codel(queues) + qa("handle %x root ns2_codel",10) +end +local function ethernet_sfq(queues) + qa("handle %x root sfq",10) +end + +local function ethernet_efq(queues) + qa("root handle %x est 1sec 8sec sfq limit 2000 depth 24 headdrop flows %d divisor 16384",10, 150) +end + +local function ethernet_efqr(queues) +-- FIXME, we can do sane things with speed here + qa("root handle %x: est 1sec 4sec sfq limit 300 depth 20 headdrop perturb 60000 flows %d divisor 16384 redflowlimit 32000 min 4500 max 18000 probability 0.20 ecn harddrop",10,2000) +end + +-- FIXME: just stubs for now + +local function ethernet_ared(queues) + qa("handle %x root red",10) +end + +local function ethernet_red(queues) + qa("handle %x root red",10) +end + +-- I don't know when a good time to have a larger quantum would be good + + +-- Proto all makes more sense maybe +local function fw_fap(parent, class, v, pref) + local mask = 0xff +-- tc filter add dev ge00 parent 1: prio 4 protocol ip handle 4/0xff fw flowid 1:40 + +-- fap("%s protocol ip pref %d fw 0x%x/0x%x classid %s", parent,pref, v, mask, class ) +-- fap("%s protocol ipv6 pref %d fw 0x%x/0x%x classid %s", parent,pref, v, mask, class + fap("%s protocol all pref %d handle %x/0x%x fw flowid %s", parent, pref, v, mask, class ) +end + +-- return min, max + +local function uplink_to_red(uplink) + +end + +local function wireless_sfqr(queues) + -- local quantum = rate_quantum(up) -- convert to number + local quantum = rate_quantum(4000) + local mtu = env.MTU +-- local mtu = 1500 + +-- trying to find an operating point that does useful stuff on +-- wireless yet lets packet agg work better + + + local function red2(parent,handle,prio,mark) + qap("1:%x handle %x: sfq limit 120 headdrop perturb 60000 flows %d divisor 16384 quantum 3000 depth 24 redflowlimit 60000 min 18000 max 50000 probability 0.20 ecn harddrop", parent, handle, 2000) +-- qap("1:%x handle %x: sfq limit 120 headdrop perturb 60000 flows %d divisor 16384 quantum 4500 depth 24 redflowlimit 30000 min 6000 max 18000 probability 0.20 ecn harddrop", parent, handle, 2000) +-- fw_fap("1:",sf("1:%x",handle),prio,mark) + end + + qa("root handle 1: mq ") + red(1,10,1,1) + red(2,20,2,2) + red(3,30,3,3) + red(4,40,4,4) + +end + +local function fourtier(queues) + kernel_prereqs({"sch_sfq", + "ifb", + "act_mirred"}) + if env.UPLINK ~= nil then + local up=env.UPLINK + local quantum = rate_quantum(up) + local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) + local mtu = env.MTU + +-- just straight sfq + + local function red4(parent,handle,prio,mark) + cap("1: classid 1:%x %s htb rate %skibit mtu %d mpu 64 quantum %d", + parent, est, up, mtu, quantum) + qap("1:%x handle %x: %s sfq limit 200", + parent, handle, est) + fw_fap("1:",sf("1:%x",handle),prio,mark) + end + +-- Hammered down on the defaults + + local function red(parent,handle,prio,mark) + cap("1: classid 1:%x %s htb rate %skibit mtu %d mpu 64 quantum %d", + parent, est, up, mtu, quantum) + qap("1:%x handle %x: %s sfq limit 200 headdrop quantum 1524 perturb 60000 flows %d divisor 16384 depth 24 redflowlimit 9000 min 1500 max 4500 probability 0.20 ecn harddrop", + parent, handle, est, 2000) + fw_fap("1:",sf("1:%x",handle),prio,mark) + end + +-- still trying to find a useful operating point + + local function red2(parent,handle,prio,mark) + cap("1: classid 1:%x %s htb rate %skibit mtu %d mpu 64 quantum %d", + parent, est, up, mtu, quantum) + qap("1:%x handle %x: %s sfq limit 200 headdrop quantum 1524 perturb 60000 flows %d divisor 16384 depth 24 redflowlimit 40000 min 4500 max 9000 probability 0.20 ecn harddrop", + parent, handle, est, 2000) + fw_fap("1:",sf("1:%x",handle),prio,mark) + end + + qa("root handle 1: %s htb default 1", est) + red(1,10,1,1) + red(2,20,2,2) + red(3,30,3,3) + red(4,40,4,4) + if env.DOWNLINK ~= nil and env.INGRESS ~= nil then sfqred_ingress() end + end +end + +-- Openwrt emulation +-- The current openwrt shaper uses a combination of +-- HFSC, SFQ, and RED + +function tcrules(cstr,rate) + dir="/usr/lib/aqm" + if file(sf("%s/tcrules.awk")) then + spewc(sf("awk -v device=\"%s\" -v linespeed=\"%s\" -f %s/tcrules.awk",env.IFACE,rate,dir), cstr) + end +end + +function oopenwrt(queues) + if env.UPLINK == nil or env.DOWNLINK == nil then + usage("UPLINK and DOWNLINK environment variables are required") + end + local ul = env.UPLINK + qa("root handle 1: hfsc default 3") + cap("1: classid 1:1 hfsc sc rate %dkbit ul rate %dkbit",ul,ul) + usage("Not complete yet") + if env.DOWNLOAD then + qa("ingress") + fap("ffff: protocol ip prio 1 u32 match u32 0 0 flowid 1:1 action connmark action mirred egress redirect dev ifb%d",ifbdev,n) + end +end + +-- Wondershaper emulation + +local function unsupported_shaper(queues) + usage("Shaper not supported for wireless") +end + +-- The wondershaper +-- This is improved over the original wondershaper with +-- Correct ECN support +-- FIXME: IPv6 support + +-- low priority source netmasks +-- NOPRIOHOSTSRC= +-- low priority destination netmasks +-- NOPRIOHOSTDST= +-- low priority source ports +-- NOPRIOPORTSRC= +-- low priority destination ports +-- NOPRIOPORTDST="21" + +local function wshaper(queues) + if env.UPLINK == nil or env.DOWNLINK == nil then + usage("UPLINK and DOWNLINK environment variables are required") + end + + local UPLINK = env.UPLINK + local DOWNLINK = env.DOWNLINK + local v6 = env.IPV6 + + qa("root handle 1: htb default 20") + +-- shape everything at UPLINK speed - this prevents huge queues in +-- your next hop gateway which destroy latency: + + cap("1: classid 1:1 htb rate %skbit burst 6k",UPLINK) + cap("1:1 classid 1:10 htb rate %dkbit burst 6k prio 1",UPLINK) + cap("1:1 classid 1:20 htb rate %dkbit burst 6k prio 2",round(9*UPLINK/10)) + cap("1:1 classid 1:30 htb rate %dkbit burst 6k prio 2",round(8*UPLINK/10)) + +-- all get Stochastic Fairness: +-- Note the perturb option is rather excessive now + + qap("1:10 handle 10: sfq perturb 10") + qap("1:20 handle 20: sfq perturb 10") + qap("1:30 handle 30: sfq perturb 10") + +-- TOS Minimum Delay (ssh, NOT scp) in 1:10: + + fap("1:0 protocol ip prio 10 u32 match ip tos 0x10 0x%x flowid 1:10",env.ECNMASK) + +-- ICMP (ip protocol 1) in the interactive class 1:10 so we +-- can do measurements & impress our friends: + + pingopt("1:0","1:10") + +-- This is just so wrong in so many cases: + +-- To speed up downloads while an upload is going on, put ACK packets +-- in the interactive class: + +fap([[1: protocol ip prio 10 u32 \ + match ip protocol 6 0xff \ + match u8 0x05 0x0f at 0 \ + match u16 0x0000 0xffc0 at 2 \ + match u8 0x10 0xff at 33 flowid 1:10]]) + +-- There is no IPv6 support in old wondershaper +-- FIXME - figure out how to find acks in ipv6 + +-- some traffic however suffers a worse fate + +local function np(args,prio,match,h) + for i,v in pairs(args:split(" ")) do + fap("1: protocol ip prio %d u32 \ + match ip %s %s %s flowid 1:30",prio,match,v, h) + if v6 then +-- fap("1: protocol ipv6 prio %d u32 \ +-- match ip %s %s %s flowid 1:30",prio,match,v, h) + end + end +end + +if env.NOPRIOPORTDST ~= nil then + np(env.NOPRIOPORTDST,14,"dport","0xffff") +end + +if env.NOPRIOPORTSRC ~= nil then + np(env.NOPRIOPORTSRC,15,"sport", "0xffff") +end + +if env.NOPRIOHOSTSRC ~= nil then + np(env.NOPRIOHOSTSRC,16,"src"," ") +end + +if env.NOPRIOHOSTDST ~= nil then + np(env.NOPRIOHOSTDST,17,"dst"," ") +end + +-- rest is 'non-interactive' ie 'bulk' and ends up in 1:20 + +fap("1: protocol ip prio 18 u32 match ip dst 0.0.0.0/0 flowid 1:20") + +-------------------- downlink -------------------------- + +-- slow downloads down to somewhat less than the real speed to prevent +-- queuing at our ISP. Tune to see how high you can set it. ISPs tend +-- to have *huge* queues to make sure big downloads are fast +-- +-- attach ingress policer: + + qa("handle ffff: ingress") + +-- filter *everything* to it (0.0.0.0/0), drop everything that's +-- coming in too fast: + + fap("ffff: protocol ip prio 50 u32 match ip src 0.0.0.0/0 police rate %dkbit burst 10k drop flowid :1",DOWNLINK) + +end + +-- For reference, this is a slavish re-implementation of the original +-- wondershaper, bugs with ecn, and ipv6 included + +-- The new SFQ should also improve it. + +-- That said, it is seriously flawed and I'd +-- like to be able to fully model it's behavior, +-- hence this implementation. + +local function owshaper(queues) + env.ECNMASK = 0xff -- original wshaper screws up on ecn + env.IPV6 = false + return wshaper(queues) +end + +-- I have to think about the calculations for 100Mbit and below... + +-- FIXME: Think on the architecture and models harder +-- first. Need to also be able to stick HSFC, netem, or HTB +-- on top of this + +WCALLBACKS = { ["qfq"] = wireless_qfq, + ["qfqred"] = wireless_qfqr, + ["red"] = wireless_red, + ["ared"] = wireless_ared, + ["sfq"] = wireless_sfq, + ["efq"] = wireless_efq, + ["sfqred"] = wireless_efqr, + ["sfqr"] = wireless_sfqr, + ["codel"] = wireless_codel, + ["fq_codel"] = wireless_fq_codel, + ["fq_codel_ll"] = wireless_fq_codel_ll, + ["nfq_codel"] = wireless_nfq_codel, + ["nfq_codel_ll"] = wireless_nfq_codel_ll, + ["efq_codel"] = wireless_efq_codel, + ["efq_codel_ll"] = wireless_efq_codel_ll, + ["ns2_codel"] = wireless_ns2_codel, + ["ns2_codel_ll"] = wireless_ns2_codel_ll, + ["htb_sfq_red"] = unsupported_shaper, + ["oopenwrt"] = unsupported_shaper, + ["owshaper"] = unsupported_shaper, + ["wshaper"] = wireless_wshaper, + ["twotier"] = unsupported_shaper, + ["fourtier"] = unsupported_shaper +} + +ECALLBACKS = { ["qfq"] = ethernet_qfq, + ["qfqred"] = ethernet_qfqr, + ["red"] = ethernet_red, + ["ared"] = ethernet_ared, + ["sfq"] = ethernet_sfq, + ["codel"] = ethernet_codel, + ["ns2_codel"] = ethernet_ns2_codel, + ["fq_codel"] = ethernet_fq_codel, + ["fq_codel_ll"] = ethernet_fq_codel_ll, + ["nfq_codel"] = ethernet_nfq_codel, + ["nfq_codel_ll"] = ethernet_nfq_codel_ll, + ["efq_codel"] = ethernet_efq_codel, + ["efq_codel_ll"] = ethernet_efq_codel_ll, + ["efq"] = ethernet_efq, + ["sfqred"] = ethernet_efqr, + ["htb_qfq_sfqred"] = htb_qfq_sfqred, + ["htb_qfq_drop_head"] = htb_qfq_drop_head, + ["htb_sfq_red"] = htb_sfq_red, + ["htb_sfq_red2"] = htb_sfq_red2, + ["htb_sfq_red_cero"] = htb_sfq_red_cero, + ["tbf"] = tbf, + ["htb_sfb"] = htb_sfb, + ["choke"] = choke, + ["htb_choke"] = htb_choke, + ["htb_qfq_choke"] = htb_qfq_choke, + ["sfb"] = sfb, + ["drr"] = drr, + ["oopenwrt"] = oopenwrt, + ["wshaper"] = wshaper, + ["owshaper"] = owshaper, + ["owshaper"] = owshaper, + ["twotier"] = twotier, + ["fourtier"] = fourtier, +} + +-- couple other models - dsl, wshaper, etc, needed +-- pingopt and argv processing too + +local function wireless(model) + print(model) + if WCALLBACKS[model] ~= nil then + wireless_qlen() + return WCALLBACKS[model]() + else + usage("AQM model not found") + end + return nil +end + +local function ethernet(model) + if ECALLBACKS[model] ~= nil then + return ECALLBACKS[model](ethernet_setup(IFACE)) + else + usage("AQM model not found") + end + return nil +end + +-- It's annoying to get deletion as an error +-- And confusing to the user + +if env.PINGOPT then + pingopt = function(parent,flow) + fap("%s protocol ip prio 10 u32 match ip protocol 1 0xff flowid %s",parent,flow) + if(env.IPV6) then + -- arguably we should only match echo and echo reply but... + fap("%s protocol ipv6 prio 10 u32 match ip protocol 1 0xff flowid %s",parent,flow) + end + end +end + +if env.QDEBUG then + env.TC="/bin/cat" + env.TCARG=" " + env.DEBLOATLOG="/tmp/debloat.log" +end + +-- FIXME - do something intelligent when faced with a bridge or vlan + +itype=interface_type(IFACE) + +if itype == 'wireless' or itype == 'ethernet' then + kernel_prereqs(PREREQS) + tc = resettc() + + if itype == 'wireless' then + wireless(env.QMODEL) +-- FIXME: you watch this code set the class, then not show up in tc +-- mac80211e() +-- iptables_insert(IFACE,"W80211e") + end + if itype == 'ethernet' then ethernet(env.QMODEL) end +end diff --git a/debloat.sh b/debloat.sh new file mode 100755 index 0000000..2c78a0b --- /dev/null +++ b/debloat.sh @@ -0,0 +1,127 @@ +#!/bin/bash +# debloat.sh - improves network latency by reducing excessive buffering +# and offloads on common devices and enabling fq_codel. +# Copyright 2012 M D Taht. Released into the public domain. + +# This script is presently targetted to go into +# /etc/network/ifup.d on debian derived systems + +LL=1 # go for lowest latency +ECN=1 # enable ECN +BQLLIMIT100=3000 # at speeds below 100Mbit, 2 big packets is enough +BQLLIMIT10=1514 # at speeds below 10Mbit, 1 big packet is enough. + # Actually it would be nice to go to just one packet +QDISC=fq_codel # There are multiple variants of fq_codel in testing +FQ_LIMIT="" # the default 10000 packet limit mucks with slow start at speeds + # at 1Gbit and below. Somewhat arbitrary figures selected. + +[ -z "$IFACE" ] && echo error: $0 expects IFACE parameter in environment && exit 1 +[ -z `which ethtool` ] && echo error: ethtool is required && exit 1 +[ -z `which tc` ] && echo error: tc is required && exit 1 +# FIXME see if $QDISC is available. modprobe? + +# BUGS - need to detect bridges. +# - Need filter to distribute across mq ethernet devices +# - needs an "undebloat" script for ifdown to restore BQL autotuning + +S=/sys/class/net +FQ_OPTS="" +#FQ_OPTS="FLOWS 2048 TARGET 5ms" + +[ $LL -eq 1 ] && FQ_OPTS="$FQ_OPTS quantum 500" +[ $ECN -eq 1 ] && FQ_OPTS="$FQ_OPTS ecn" + +FLOW_KEYS="src,dst,proto,proto-src,proto-dst" +# For 5-tuple (flow) fairness when the same device is performing NAT +#FLOW_KEYS="nfct-src,nfct-dst,nfct-proto,nfct-proto-src,nfct-proto-dst" + + +# Offloads are evil in the quest for low latency +# And ethtool will abort if you attempt to turn off a +# nonexistent offload. + +et() { +( + ethtool -K $IFACE tso off + ethtool -K $IFACE gso off + ethtool -K $IFACE ufo off +# Presently unknown if gro/lro affect latency much + ethtool -K $IFACE gro off + ethtool -K $IFACE lro off +) 2> /dev/null +} + +# Wifi is special in that how the queues work is pre-defined +# to be voice, video, best effort and background + +wifi() { + tc qdisc add dev $IFACE handle 1 root mq + tc qdisc add dev $IFACE parent 1:1 $QDISC $FQ_OPTS noecn + tc qdisc add dev $IFACE parent 1:2 $QDISC $FQ_OPTS + tc qdisc add dev $IFACE parent 1:3 $QDISC $FQ_OPTS + tc qdisc add dev $IFACE parent 1:4 $QDISC $FQ_OPTS noecn +} + +# Hardware mq ethernet devs are special and need some sort of filter +# attached to actually use in most cases. FIXME. (see tg3) + +mq() { + local I=1 + tc qdisc add dev $IFACE handle 1 root mq + + for i in $S/$IFACE/queues/tx-* + do + tc qdisc add dev $IFACE parent 1:$(printf "%x" $I) $QDISC $FQ_OPTS + I=`expr $I + 1` + done + I=`expr $I - 1` + tc filter add dev $IFACE prio 1 protocol ip parent 1: handle 100 \ + flow hash keys ${FLOW_KEYS} divisor $I baseclass 1:1 +} + +fq_codel() { + tc qdisc add dev $IFACE root $QDISC $FQ_OPTS $FQ_LIMIT +} + +fix_speed() { +local SPEED=`cat $S/$IFACE/speed` 2> /dev/null +if [ -n "$SPEED" ] +then + [ "$SPEED" = 4294967295 ] && echo "no ethernet speed selected. debloat estimate will be WRONG" + [ "$SPEED" -lt 1001 ] && FQ_LIMIT=1200 + if [ "$SPEED" -lt 101 ] + then + [ $LL -eq 1 ] && et # for lowest latency disable offloads + BQLLIMIT=$BQLLIMIT100 + FQ_LIMIT="limit 800" + [ "$SPEED" -lt 11 ] && BQLLIMIT=$BQLLIMIT10 && FQ_LIMIT="limit 400" + for I in /sys/class/net/$IFACE/queues/tx-*/byte_queue_limits/limit_max + do + echo $BQLLIMIT > $I + done + fi +fi +} + +fix_queues() { +local QUEUES=`ls -d $S/$IFACE/queues/tx-* | wc -l | awk '{print $1}'` +if [ $QUEUES -gt 1 ] +then + if [ -x $S/$IFACE/phy80211 ] + then + wifi + else + mq + fi +else + fq_codel +fi +} + + +tc qdisc del dev $IFACE root 2> /dev/null +fix_speed +fix_queues + +exit 0 + diff --git a/rpm-verify.sh b/rpm-verify.sh index 2d74c21..8f52ec5 100755 --- a/rpm-verify.sh +++ b/rpm-verify.sh @@ -30,6 +30,16 @@ echo "Generating reports ..." /bin/egrep '^.{9} c /' ${TMPDIR}/RPM-VA2_${DS}.txt > ${TMPDIR}/REVIEW-CONFIGS_${DS}.txt /bin/find /etc -name '*.rpm?*' > ${TMPDIR}/REVIEW-OBSOLETE-CONFIGS_${DS}.txt +if [ \! -f /var/lib/yum/groups/installed ]; then + # "group mark convert" converts the automatic data you get without using groups + # as objects into groups as objects data. This makes it much easier to convert + # to groups as objects without having to reinstall. + if [ 0$(rpm -qf /etc/redhat-release --qf '%{version}\n') -gt 18 ]; then + echo "F19 hack to help from upgrades" + yum group mark convert + fi +fi + echo "Requesting extra reporting tools to be installed ..." # yum -q install fpaste yum-utils rpmdevtools policycoreutils-python /usr/bin/yum -q install \ diff --git a/simple_qos.sh b/simple_qos.sh new file mode 100644 index 0000000..0739181 --- /dev/null +++ b/simple_qos.sh @@ -0,0 +1,255 @@ +#!/bin/sh +# Cero3 Shaper +# A 3 bin tc_codel and ipv6 enabled shaping script for +# ethernet gateways, with an eye towards working well +# with wireless with uplinks in the 2Mbit to 25Mbit +# range. It ain't done yet, and is cerowrt specific +# in that it depends on clearly identifying the +# internal interfaces via a pattern match. + +# Copyright (C) 2012 Michael D Taht +# GPLv2 + +# Compared to the complexity that debloat had become +# this cleanly shows a means of going from diffserv marking +# to prioritization using the current tools (ip(6)tables +# and tc. I note that the complexity of debloat exists for +# a reason, and it is expected that script is run first +# to setup various other parameters such as BQL and ethtool. +# (And that the debloat script has setup the other interfaces) + +# You need to jiggle these parameters. Note limits are tuned towards a <10Mbit uplink <60Mbup down + +UPLINK=2000 +DOWNLINK=20000 +DEV=ifb0 +QDISC=fq_codel # fq_codel is winning universally +IFACE=ge00 +DEPTH=42 +TC=/usr/sbin/tc +FLOWS=8000 +PERTURB="perturb 0" # Permutation is costly, disable +FLOWS=16000 # +BQL_MAX=3000 # it is important to factor this into the RED calc + +CEIL=$UPLINK +MTU=1500 +ADSLL="" +# PPOE=yes + +#config interface ge00 +# option classgroup "Default" +# option enabled 0 +# option upload 128 +# option download 1024 + +# uci get aqm.enable +# +# You shouldn't need to touch anything here + +if [ -s "$PPOE" ] +then + OVERHEAD=40 + LINKLAYER=adsl + ADSLL="linklayer ${LINKLAYER} overhead ${OVERHEAD}" +fi + +ipt() { +iptables $* +ip6tables $* +} + +do_modules() { + +insmod sch_$QDISC +insmod sch_ingress +insmod act_mirred +insmod cls_fw +insmod sch_htb + +} + +fc() { +PARENT=$1 +TOS=$2 +CLASSID=$3 +tc filter add dev $interface protocol ip parent $PARENT prio $prio u32 match ip tos $TOS 0xfc classid $CLASSID +prio=$(($prio + 1)) +tc filter add dev $interface protocol ipv6 parent $PARENT prio $prio u32 match ip6 priority $TOS 0xfc classid $CLASSID +prio=$(($prio + 1)) +} + +# This could be a complete diffserv implementation + +diffserv() { + +interface=$1 + +prio=1 + +# Catchall + +tc filter add dev $interface parent 1:0 protocol all prio 999 u32 \ + match ip protocol 0 0x00 flowid 1:12 + +# Find the most common matches fast + +fc 1:0 0x00 1:12 # BE +fc 1:0 0x20 1:13 # CS1 +fc 1:0 0x10 1:11 # IMM +fc 1:0 0xb8 1:11 # EF +fc 1:0 0xc0 1:11 # CS3 +fc 1:0 0xe0 1:11 # CS6 +fc 1:0 0x90 1:11 # AF42 (mosh) + +# Arp traffic +tc filter add dev $interface parent 1:0 protocol arp prio $prio handle 1 fw classid 1:11 +prio=$(($prio + 1)) +} + + +ipt_setup() { + +ipt -t mangle -F +ipt -t mangle -N QOS_MARK + +ipt -t mangle -A QOS_MARK -j MARK --set-mark 0x2 +# You can go further with classification but... +ipt -t mangle -A QOS_MARK -m dscp --dscp-class CS1 -j MARK --set-mark 0x3 +ipt -t mangle -A QOS_MARK -m dscp --dscp-class CS3 -j MARK --set-mark 0x1 +ipt -t mangle -A QOS_MARK -m dscp --dscp-class CS6 -j MARK --set-mark 0x1 +ipt -t mangle -A QOS_MARK -m dscp --dscp-class EF -j MARK --set-mark 0x1 +ipt -t mangle -A QOS_MARK -m dscp --dscp-class AF42 -j MARK --set-mark 0x1 +ipt -t mangle -A QOS_MARK -m tos --tos Minimize-Delay -j MARK --set-mark 0x1 + +# and it might be a good idea to do it for udp tunnels too + +# Turn it on. Preserve classification if already performed + +ipt -t mangle -A POSTROUTING -o $DEV -m mark --mark 0x00 -g QOS_MARK +ipt -t mangle -A POSTROUTING -o $IFACE -m mark --mark 0x00 -g QOS_MARK + +# The Syn optimization was nice but fq_codel does it for us +# ipt -t mangle -A PREROUTING -i s+ -p tcp -m tcp --tcp-flags SYN,RST,ACK SYN -j MARK --set-mark 0x01 +# Not sure if this will work. Encapsulation is a problem period +ipt -t mangle -A PREROUTING -i vtun+ -p tcp -j MARK --set-mark 0x2 # tcp tunnels need ordering + +# Emanating from router, do a little more optimization +# but don't bother with it too much. + +ipt -t mangle -A OUTPUT -p udp -m multiport --ports 123,53 -j DSCP --set-dscp-class AF42 + +#Not clear if the second line is needed +#ipt -t mangle -A OUTPUT -o $IFACE -g QOS_MARK + +} + + +# TC rules + +egress() { + +CEIL=${UPLINK} +PRIO_RATE=`expr $CEIL / 3` # Ceiling for prioirty +BE_RATE=`expr $CEIL / 6` # Min for best effort +BK_RATE=`expr $CEIL / 9` # Min for background +BE_CEIL=`expr $CEIL - 64` # A little slop at the top + +R2Q="" + +if [ "$CEIL" -lt 1000 ] +then + R2Q="rtq 1" +fi + +tc qdisc del dev $IFACE root +tc qdisc add dev $IFACE root handle 1: htb ${RTQ} default 12 +tc class add dev $IFACE parent 1: classid 1:1 htb rate ${CEIL}kbit ceil ${CEIL}kbit $ADSLL +tc class add dev $IFACE parent 1:1 classid 1:10 htb rate ${CEIL}kbit ceil ${CEIL}kbit prio 0 $ADSLL +tc class add dev $IFACE parent 1:1 classid 1:11 htb rate 128kbit ceil ${PRIO_RATE}kbit prio 1 $ADSLL +tc class add dev $IFACE parent 1:1 classid 1:12 htb rate ${BE_RATE}kbit ceil ${BE_CEIL}kbit prio 2 $ADSLL +tc class add dev $IFACE parent 1:1 classid 1:13 htb rate ${BK_RATE}kbit ceil ${BE_CEIL}kbit prio 3 $ADSLL + +tc qdisc add dev $IFACE parent 1:11 handle 110: $QDISC limit 600 quantum 300 noecn +tc qdisc add dev $IFACE parent 1:12 handle 120: $QDISC limit 600 quantum 300 noecn +tc qdisc add dev $IFACE parent 1:13 handle 130: $QDISC limit 600 noecn + +tc filter add dev $IFACE parent 1:0 protocol ip prio 1 handle 1 fw classid 1:11 +tc filter add dev $IFACE parent 1:0 protocol ip prio 2 handle 2 fw classid 1:12 +tc filter add dev $IFACE parent 1:0 protocol ip prio 3 handle 3 fw classid 1:13 + +# ipv6 support. Note that the handle indicates the fw mark bucket that is looked for + +tc filter add dev $IFACE parent 1:0 protocol ipv6 prio 4 handle 1 fw classid 1:11 +tc filter add dev $IFACE parent 1:0 protocol ipv6 prio 5 handle 2 fw classid 1:12 +tc filter add dev $IFACE parent 1:0 protocol ipv6 prio 6 handle 3 fw classid 1:13 + +# Arp traffic + +tc filter add dev $IFACE parent 1:0 protocol arp prio 7 handle 1 fw classid 1:11 + +} + +ingress() { + +CEIL=$DOWNLINK +PRIO_RATE=`expr $CEIL / 3` # Ceiling for prioirty +BE_RATE=`expr $CEIL / 3` # Min for best effort +BK_RATE=`expr $CEIL / 6` # Min for background +BE_CEIL=`expr $CEIL - 64` # A little slop at the top + +R2Q="" + +tc qdisc del dev $IFACE handle ffff: ingress +tc qdisc add dev $IFACE handle ffff: ingress + +tc qdisc del dev $DEV root +tc qdisc add dev $DEV root handle 1: htb ${RTQ} default 12 +tc class add dev $DEV parent 1: classid 1:1 htb rate ${CEIL}kbit ceil ${CEIL}kibit $ADSLL +tc class add dev $DEV parent 1:1 classid 1:10 htb rate ${CEIL}kbit ceil ${CEIL}kibit prio 0 $ADSLL +tc class add dev $DEV parent 1:1 classid 1:11 htb rate 32kbit ceil ${PRIO_RATE}kibit prio 1 $ADSLL +tc class add dev $DEV parent 1:1 classid 1:12 htb rate ${BE_RATE}kbit ceil ${BE_CEIL}kibit prio 2 $ADSLL +tc class add dev $DEV parent 1:1 classid 1:13 htb rate ${BK_RATE}kbit ceil ${BE_CEIL}kibit prio 3 $ADSLL + +# I'd prefer to use a pre-nat filter but that causes permutation... + +tc qdisc add dev $DEV parent 1:11 handle 110: $QDISC limit 1000 ecn +tc qdisc add dev $DEV parent 1:12 handle 120: $QDISC limit 1000 ecn +tc qdisc add dev $DEV parent 1:13 handle 130: $QDISC limit 1000 ecn + +diffserv ifb0 + +ifconfig ifb0 up + +# redirect all IP packets arriving in $IFACE to ifb0 + +$TC filter add dev $IFACE parent ffff: protocol all prio 10 u32 \ + match u32 0 0 flowid 1:1 action mirred egress redirect dev $DEV + +} + +do_modules +ipt_setup +egress +ingress + +# References: +# This alternate shaper attempts to go for 1/u performance in a clever way +# http://git.coverfire.com/?p=linux-qos-scripts.git;a=blob;f=src-3tos.sh;hb=HEAD + +# Comments +# This does the right thing with ipv6 traffic. +# It also does not rehash with sfq skewing streams +# It also tries to leverage diffserv to some sane extent. In particular, +# the 'priority' queue is limited to 33% of the total, so EF, and IMM traffic +# cannot starve other types. The rfc suggested 30%. 30% is probably +# a lot in today's world. + +# Flaws +# Many! + +# Why 42? +# Lucky number. +# the sum of the number of packets here + htb + the ar71xx device driver +# ~= 50 the core number used by theorists everywhere. + |
