#!/usr/bin/lua -- Use of various qdiscs for ethernet and wireless -- This script expects to be run in /etc/network/if-pre-up.d To run it -- manually, do a IFACE=yournetworkcard ./debloat -- For NATTED interfaces, use a NAT=y for a better filter -- To select QFQ use QMODEL=qfq. -- Some options currently require a new version of tc. Build a version -- of iproute2 and stick it somewhere and change the TC variable to -- suit. -- These are the possible parameters that can be changed via -- environment variables. params = { "MDISC", "BIGDISC", "NORMDISC", "BINS", "MAX_HWQ_BYTES", "QMODEL", "FORCE_SPEED", "FORCE_RING", "QDEBUG", "VERBOSE", "UPLINK", "DOWNLINK", "IFACE", "SPEED", "DEPTH", "DEBLOATLOG", "NAT", "NAT64", "CLAMP_MSS", "SYN_FLOOD", "MTU", "INGRESS", "TC", "TCARG", "ETHTOOL", "INSMOD", "LSMOD", "IPTABLES", "IP6TABLES", "PINGOPT", "FLOWS", "TSO", "BURST", "IPV6", "OVERHEAD", "LINKLAYER", "MPU", "PPPOE", "ADSL", "NOPRIOHOSTSRC","NOPRIOHOSTDST","NOPRIOPORTSRC", "NOPRIOPORTDST", "EST_MIN", "EST_MAX", "HEADDROP", "ECNMASK", "qlen_vo", "qlen_vi", "qlen_be", "qlen_bk", "CODEL_LL_QUANTUM" } -- Useful defaults env = { ["TC"] = "/sbin/tc", ["TCARG"] = "-b", ["INSMOD"] = "/sbin/modprobe", ["ETHTOOL"] = "/sbin/ethtool", ["LSMOD"] = "/sbin/lsmod", ["IPTABLES"] = "/sbin/iptables", ["IP6TABLES"] = "/sbin/ip6tables", ["MDISC"] = "codel", ["BIGDISC"] = "codel", ["NORMDISC"] = "codel", ["BINS"] = 2048, ["DEPTH"] = 24, ["QMODEL"] = "fq_codel_ll", ["MAX_HWQ_BYTES"] = 3000, ["ECNMASK"] = 0xfc, ["EST_MIN"] = 1, ["EST_MAX"] = 4, ["IPV6"] = true, ["LINKLAYER"] = "ethernet", ["DEBLOATLOG"] = "/dev/null", ["qlen_vo"] = 2, ["qlen_vi"] = 4, ["qlen_be"] = 12, ["qlen_bk"] = 12, ["MTU"] = 1500, ["CODEL_LL_QUANTUM"] = 1000 } wireless_debug = "/sys/kernel/debug/ieee80211/" qlens = { "qlen_vo", "qlen_vi", "qlen_bk", "qlen_be" } -- various shortcuts for commonly used functions local sf=string.format local exec=os.execute local popen=io.popen local open=io.open -- FIXME, override above to redirect stderr VO=0x10; VI=0x20; BE=0x30; BK=0x40 local WQUEUES = { BE, VO, VI, BK } local function usage(s) o=[[ The debloat tool aims for minimal latency (particularly under load) on the network, for hosts, servers, wireless devices, and routers. There are various forms of traffic shapers and tools in here because this is an unsolved problem! Most of the known techniques are in here, however, and the results can be quite remarkable. At tested rates of 100Mbit and 4Mbit, we see interstream latencies drop by over two orders of magnitude. This script expects to be run in /etc/network/if-pre-up.d To run it manually, do a: IFACE=yournetworkcard ./this_script For NATTED interfaces, use a NAT=y for a better filter. There are many environment variables and at some point will be a conf file. The one of greatest importance is "QMODEL" to which I keep adding various models for various circumstances. See the end of this file for more details. This script can be run on both debian and openwrt. Usage of QFQ and the advanced SFQ and SFQRED options currently requires a patched version of iproute2 and a Linux 3.3 kernel. Build a version and stick it somewhere and change TC to suit. Also, if you are interested in seeing the rules being generated, rather than reconfiguring your system export QDEBUG=1 is helpful. * Some general overall design notes: This started out life as a shell script to exercise qfq, Now it does a lot more than that and is getting crufty. FQ_CODEL is now the default. SFQ has been improved significantly in Linux 3.3 (eliminating a head of line problem), and in this case no new TC utility is required. Also a bug in red was fixed, and no new tc utility is required there either. So if you were using either or both of these qdiscs, you should automagically see your life improve... QFQ is too buggy prior to 3.3 to use. More advanced SFQ options and REDSFQ and QFQ all require a patched version of TC. Also, most builds for the linux kernel do not enable QFQ by default. QFQ and SFQ are behaving competitively now in most circumstances, however. * Byte Queue Limits is supposed to have a rate limiter that works. It is not very effective at less than 100Mbit. I get ~32k peak there and with GSO on, at 100Mbit, I have seen latency spikes of up to 70ms. (Not recently tested, however) A per queue limit of 2 large packets appears to be the best compromise at 100Mbit and below. So typically I hammer down BQL to 3k at < 100Mbit, and turn GSO/TSO off, and as a result see ping against load latencies in the 1 to 2ms range, which is about what you would expect. I have tried 1500 bytes, which limited the top end performance to about 84Mbit. At 10Mbit, 1514 works on most OSes. For comparison, you will see PFIFO_FAST doing 130+ms, pre BQL, no SFQ at 100Mbit. * A BQL enabled ethernet device driver is helpful But there is currently no good way to detect if you have one at run time. 10 of the most major drivers have been convered to BQL, more remain. * Wireless still has problems This stuff helps on wireless stations, desktops, etc, and on P2P wireless links. ** caveat 1 There remains so much device buffering and retries below the qdisc layer as to defeat both FQ and and AQM to a large extent. Also packets tend to be held 'forever' (ping rtts of over 10 seconds have been observed) A time in queue optimization at the qdisc layer for the latter problem has been proposed, but not implemented, and much further work on the wireless driver portion of the stack remains to be designed and agreed upon. BQL has not (and cannot, to a large extent) be implemented on the wireless portion of the stack as it currently stands. ** caveat 2 There is not a particularly good way to apply much of this to the wireless interface on an AP as yet. FQ messes with wireless-n packet aggregation. That said, under home use with a limited number of user, SFQ+RED does seem to work pretty good. * Some QFQ related notes: ** QFQ can handle up to 32k bins Whether you are willing to wait for them to be generated is a better question. How this interacts with bittorrent etc is also a good question. 512 is 4x as many bins as the old SFQ implementation. I have tested as many as 2048 bins, problems ensue with kernel memory allocation at various levels higher than that. The 'bin creation' problem is why this code uses tc in batch mode. It used to take minutes to create the bins. Now, a split second. (there was also a patch that helped this in 3.3) ** Various sub-qdiscs in QFQ I have tried pfifo_drop_head, SFB, and RED here. All had bugs until 3.3. And linux RED & SFB, being byte oriented, was often not good. pfifo_drop_head generates interesting results. The very new combination of REDSFQ which compensates for both bytes and packets was very interesting, as it combines everything we have learned in the past year into one single qdisc which can be brought up as a shaper in three lines of code. FQ_Codel is better. In other news: I have not tried the new 'adaptive red' implementation as a stand alone qdisc, nor revisited SFB in light about what I now know about GSO behavior. I would like to try QFQ and SFQ in combination to attempt to defeat the bittorrent problem at some point. ** Calculating a sane per-queue packet limit is an issue, too. iw10 requires a minimum of 10, and more likely 12 (fin, close) so... In places we arbitrarily double that, and wave hands. I almost never see packet drop with 24, which is far, far better than 1000. Might need to be larger on gigE+. Might be wrong headed entirely. ** Multicast We try to maltreat multicast especially in the QFQ implementation. When handed to a load balancing filter based on IPs, multicast addresses are all over the map. It would be trivial to do a DOS with this multi-bin setup. So we toss all multicast into a single bin whenever possible. This is suboptimal, also. It would be good to get multicast into the VO queue on wireless but bugs exist. Multicast concerns me also when using SFQ on general purpose ethernet. ** Default Bins You can do tricks with the DEFAULTB concept, creating a filter to optimize for ping, for example, which makes tests reproducable. (this is done for wshaper and QFQ) Another example would be to set aside bins for voip or dns, etc. Still, it is saner to just let the filter do all the work of finding a decent bin. The only sane purpose for DEFAULTB at the moment is to have a safe place to put QFQ packets until all the filters and bins are setup. * Other important debloat options There are many environment variables that can be set. Most notably - the QMODEL var has various forms of AQM/FQ/shaper available. Available QMODELS are qfq, sfq, sfqred, efq and various combinations thereof, as well as a hard coded 4mbit htb_sfq_red model, and emulations of the original wondershaper and a mildly improved one. See the tail end of the code for what is available. Most work on either ethernet or wireless and try to deal with the problems of each. Usage of QFQ and the advanced SFQ options currently requires a new version of iproute2 and a Linux 3.3 kernel and some patches. A byte Queue limit enabled device driver is required for ethernet, except for when the HTB rate limiter is used. In all cases a Linux 3.3 or later kernel is required for best results. ]] print(o) -- print("Available Shaper Models Are:") -- for i,v in pairs (ECALLBACKS) do -- print(i) -- end print(s) os.exit(-1) end pingopt = function(...) return nil end warn = function(...) return nil end function file_exists(name) local f=open(name,"r") if f ~= nil then f:close(); return true else return false end end local function is_openwrt() if file_exists("/etc/uci-defaults") then return true else return false end end -- Override various defaults with env vars if is_openwrt() then env.INSMOD = "/sbin/insmod" env.ETHTOOL = "/usr/sbin/ethtool" env.TC = "/usr/sbin/tc" end -- pull params from conf file local function getconf() end -- getenv pulls in everything as strings, -- so do the conversion here local function fromenv(v) local s = os.getenv(v) if s == nil then return nil end -- FIXME allow . local m = string.match(s,"^%d+") if m ~= nil then return tonumber(s) end if s == "true" then return true end if s == "false" then return false end return s end -- From the possible parameters in t, override o local function getenvs(t,o) for i,v in pairs(t) do local s = fromenv(v) if s ~= nil then o[v] = s end end return o end env = getenvs(params, env) if (env["IFACE"] == nil) then usage("Error: The IFACE environment variable must be set") end IFACE=env.IFACE QMODEL=env.QMODEL BINS=env.BINS MULTICAST=BINS+1 DEFAULTB=BINS+2 PREREQS = { "sch_qfq", "sch_codel", "sch_fq_codel", "cls_u32", "cls_flow", "sch_sfq", "sch_red", "sch_htb", "cls_fw", "sch_efq_codel", "sch_ns2_codel", "sch_nfq_codel" } -- we can get more complex later PREREQS2 = { ["qfq"] = { "sch_qfq", "cls_u32", "cls_flow" }, ["sfq"] = { "sch_sfq", "cls_u32", "cls_flow" }, ["red"] = { "sch_qfq", "sch_red", "cls_u32", "cls_flow" }, ["ared"] = { "sch_qfq", "sch_red", "cls_u32", "cls_flow" } } -- FIXME: Merge multiple tables into one table on values in first -- return table function merge(...) local t = { } for i,v in pairs(...) do for i,v in pairs(v) do t[v] = true end end return t end -- slurp a file function slurpf(file) local f = open(file,"r") if f ~= nil then local s = f:read("*all") f:close() return s end return nil end -- spew output into a command function spewc(command,s) local f = popen(command,"w") if f ~= nil then local v = f:write(s) f:close() return v end return nil end -- spew output into a file function spewf(file,s) local f = open(file,"w") if f ~= nil then local v = f:write(s) f:close() return v end return nil end -- slurp a file into a table function tslurpf(file) local s = slurpf(file) if s ~= nil then return s:split("\n") end return nil end -- return the output of a command as a big string function slurpc(command) local f = popen(command,"r") if f ~= nil then local s = f:read("*all") f:close() return s end return nil end -- return the output of a command as a table function tslurpc(command) local s = slurpc(command) if s ~= nil then return s:split("\n") end return nil end -- Some utility functions -- can't depend on 'wlan or eth' patterns, so try sysfs -- FIXME: This needs to be made smarter and detect other forms -- of tunnel. function interface_type(iface) if iface == 'lo' then return('localhost') end if iface:sub(1,3) == 'ifb' then return('ifb') end if iface:find('%.') ~= nil then return('vlan') end if iface:sub(1,3) == 'gre' then return('tunnel') end if iface:sub(1,2) == 'br' then return('bridge') end if file_exists(sf("/sys/class/net/%s/phy80211/name",iface)) then return ('wireless') end return ('ethernet') end local function ethtool_popen(...) return popen(sf("%s %s 2>> %s",env.ETHTOOL,sf(...),env.DEBLOATLOG),"r") end local function ethtool(...) exec(sf("%s %s 2>> %s",env.ETHTOOL,sf(...),env.DEBLOATLOG)) end -- lua doesn't have a split function. Grr. function string:split(sep) local sep, fields = sep or ":", {} local pattern = string.format("([^%s]+)", sep) self:gsub(pattern, function(c) fields[#fields+1] = c end) return fields end -- return the modules already installed local function lsmod() local t = { } local k = { } for i,v in pairs(tslurpc(env.LSMOD)) do k = v:split(" ") if k[1] ~= "Module" then table.insert(t,k[1]) end end return t end -- take a table of modules to insert local function insmod(modules) for i,v in pairs(modules) do exec(sf("%s %s",env.INSMOD,v)) end return true end -- there must be a more lua-specific way for this function exists(t,s) for i,v in pairs(t) do if v == s then return true end end return false end -- Return pre-reqs not installed function prereq_check(prereqs) s = lsmod() local t = { } for i,v in pairs(prereqs) do if exists(s,v) == false then table.insert(t,v) end end return t end -- install pre-reqs if not installed function kernel_prereqs(prereqs) return insmod(prereq_check(prereqs)) end function round(v) return math.ceil(v-.5) end function kleinrock(bandwidth, delay, flows) return round(math.sqrt(flows)*bandwidth*delay) end function bound(v,min,max) if max < min then local t = max max = min min = t end if v > min and v < max then return v end if v > max then return max end if v < min then return min end return v end function rbound(v,min,max) return round(bound(v,min,max)) end local function lowrate(rate) if rate < 15000 then return true else return false end end -- Don't like these side effects local function htb_est() s = " " if env.PPPOE then env.OVERHEAD=40 end if env.ADSL then env.LINKLAYER="adsl" end if lowrate(env.UPLINK) then env.R2Q=1 end end -- ADSL overhead calculation for htb local function overhead(rate) local s = " " if env.OVERHEAD then s = s .. "OVERHEAD=" .. OVERHEAD .. " " end if env.LINKLAYER then s = s .. "LINKLAYER=" .. env.LINKLAYER .. " " end if lowrate(rate) then s = s .. "R2Q=1 " end return(s) end -- Attempt at finding useful values for sfqred -- FIXME: re-read ared paper -- Yes, you want a depth=10 (or 15) limit, to cope with the increase -- of CWND done by Google. -- Best thing would be to use SFQRED and headdrop, so that there is no -- assumption on packet lengths. -- tc qdisc add ... sfq headdrop limit 200 depth 15 redflowlimit 50000 -- min 5000 max 10000 probability 0.15 ecn -- Another often hidden assumption is that RTTs are 100ms target = 50 -- The problem with byte oriented red is that it will never -- kick in or mark acks. local function redflowlimit(up,down) local OVERHEAD=0 if env.OVERHEAD then OVERHEAD=env.OVERHEAD end bytes_per_ms = up / 12.5 pkt_per_ms_worst = bytes_per_ms/(env.MTU + OVERHEAD) pkt_per_ms_best = bytes_per_ms/(64 + OVERHEAD) local limit = round(pkt_per_ms_best * target) local perflow = round(pkt_per_ms_best) local rlimit = rbound(target * bytes_per_ms,1500,15000) local depth = 10 return limit,rlimit,depth,pkt_per_ms_worst end --print(redflowlimit(4000,20000)) --print(redflowlimit(2000,20000)) --print(redflowlimit(400,2000)) -- Choke is packet oriented at the UI, but bytes underlie it... local function chokelimit(up,down) local OVERHEAD=0 if env.OVERHEAD then OVERHEAD=env.OVERHEAD end bytes_per_ms = up / 12.5 pkt_per_ms_worst = bytes_per_ms/(env.MTU + OVERHEAD) pkt_per_ms_best = bytes_per_ms/(64 + OVERHEAD) local limit = round(pkt_per_ms_best * target) local perflow = round(pkt_per_ms_best) local rlimit = rbound(target * bytes_per_ms,1500,15000) local depth = 10 return limit,rlimit,depth,pkt_per_ms_worst end -- Some htb info that is probably hopelessly out of date -- Counting packets with quantum can be strange. If we have a low rate -- class (rate = 5kbit), default quantum = 5000 / 10 = 500 bytes. But -- most packets are more then 500 bytes. Htb version 1 and 2 uses DRR, -- so a packet larger then 1000 bytes will be sent and it will -- remember how much it sent and wait until the packet is paid back -- before another packet is send. So if you send 1000 byte, next time -- the class is polled, you will not be allowed to send. -- Htb3 uses the WRR scheduler. When a packet with size > quantum is -- sent, it will be sent and an error that the quantum is too small -- will be logged. But there is no pay back. The WRR scheduler is -- faster then the DRR scheduler. So make sure quantum is bigger then -- the default packet size. For 15 kbyte/s and default r2q, quantum is -- 1500 and this is exactly the maximum packet size. If you want to -- tune htb for rates smaller then 15 kbyte/s, you can manually set -- the r2q and/or quantum. -- FIXME: -- We want to capture the characteristics of -- the interface in a table. -- So we need to parse the output of ethtool better -- ["tx-ring"] = X -- ["speed"] = X -- etc -- return a hash of the properties of the interface -- terrific, this is hard to parse. -- ethtool -g eth0 -- Ring parameters for eth0: -- Pre-set maximums: -- RX: 4096 -- RX Mini: 0 -- RX Jumbo: 0 -- TX: 4096 -- Current hardware settings: -- RX: 256 -- RX Mini: 0 -- RX Jumbo: 0 -- TX: 64 -- ethtool -g wlan0 -- Ring parameters for wlan0: -- Pre-set maximums: -- RX: 0 -- RX Mini: 0 -- RX Jumbo: 0 -- TX: 0 -- Current hardware settings: -- RX: 0 -- RX Mini: 0 -- RX Jumbo: 0 -- TX: 0 -- -k is easier -- ethtool -k eth0 -- Offload parameters for eth0: -- rx-checksumming: on -- tx-checksumming: on -- scatter-gather: on -- tcp-segmentation-offload: off -- udp-fragmentation-offload: off -- generic-segmentation-offload: off -- generic-receive-offload: on -- large-receive-offload: off -- rx-vlan-offload: on -- tx-vlan-offload: on -- ntuple-filters: off -- receive-hashing: off function string:trim () return (string.gsub(self, "^%s*(.-)%s*$", "%1")) end function offloads(iface) local t = { } local s = tslurpc(sf("%s -k %s",env.ETHTOOL,iface)) if s ~= nil then for i,v in ipairs(s) do if v == "no offload info available" then return nil end end for i,v in ipairs(s) do local h = v:split(":") local j = h[1]:split(" ") if # j > 1 then if j[1] ~= "Offload" then if # h > 1 then t[h[1]:trim()] = h[2]:trim() end -- FIXME: should probably change 'off' and 'on' to false and true end end end end return t end -- test the offloads problem -- we have a wide range of possible inputs to test against as yet function test_offloads(iface) local o = offloads(iface) if o ~= nil then for i,v in pairs(o) do print(sf("%s %s",i,v)) end end end -- test_offloads("eth1") -- FIXME - could use a little more thought on creating the -- hash function ring_params(iface) local t = { } local state = 0 for i,v in ipairs(tslurpc(sf("%s -g %s",env.ETHTOOL,iface))) do local h = v:split(":") local j = h[1]:split(" ") if j[1] == "Ring" or j[1] == "Pre-set" then -- do nothing elseif j[1] == "Current" then state = 1 elseif state == 0 then t['max_' .. h[1]:trim()] = h[2]:trim() elseif state == 1 then t['cur_' .. h[1]:trim()] = h[2]:trim() end end return t end function test_ring_params(iface) local o = ring_params(iface) for i,v in pairs(o) do print(sf("%s %s",i,v)) end end -- test_offloads(IFACE) -- test_ring_params(IFACE) function iface_get(iface) end -- return number of hardware queues found local function bql_setup(iface) local c = 0 while spewf(sf("/sys/class/net/%s/queues/tx-%d/byte_queue_limits/limit_max",iface,c), env.MAX_HWQ_BYTES) ~= nil do c = c + 1 end return c end -- Maybe better done with ethtool local function speed_set(iface,speed) return spewf(sf("/sys/class/net/%s/speed",iface),speed) end local function speed_get(iface) return slurpf(sf("/sys/class/net/%s/speed",iface)) end local function rate_quantum(rate) if rate < 10000 then return(1500) end return(1500) end -- Doing this as a lookup table hurt lua -- FIXME: Not clear how to reset to advertising all -- Not clear how to reset this parameter from -- userspace to autonegotiate -- What to do with non-sensical values that you -- get before an interface is live? -- Maybe use ethtool speed option? local function advertise_speed(s) local x = "0x000" if s < 10001 then x = "0x1000" end if s < 1001 then x = "0x020" end if s < 101 then x = "0x008" end if s < 11 then x = "0x002" end if s == 0 then x = "0x000" end return x end -- TSO does terrible things to the scheduler -- GSO does as well -- UFO is not a feature of most devices -- In the long run I think we want to disable -- TSO and GSO entirely below 100Mbit. I'd -- argue for same for gigE, too, for desktops local function ethernet_setup(iface) local o = offloads(iface) local tx = ring_params(iface) -- for testing, limit ethernet to SPEED if env.FORCE_SPEED then ethtool(sf("-s %s advertise %s",iface, advertise_speed(env.FORCE_SPEED))) end if env.FORCE_RING then if env.FORCE_RING < tx['cur_TX'] then ethtool(sf("-G %s tx %d",iface,env.FORCE_RING)) end end local queues = bql_setup(iface) if env.TSO == nil then ethtool("-K %s gso off",iface) ethtool("-K %s tso off",iface) ethtool("-K %s ufo off",iface) ethtool("-K %s gro off",iface) end return queues end -- Some TC helpers -- rates are specified as kilobits (xkbits) -- does this need to be integer? -- FIXME the effect of rounding really isn't what we want local function r2s(rate) if round(rate) < 1000 then return round(rate) .. "kbit" end local r = rate/1000 if round(r) < 1000 then return round(r) .. "mbit" end r = r/1000 return round(r) .. "gbit" end -- print(r2s(10)) -- print(r2s(1001)) -- print(r2s(1050)) -- yea, rounding bad -- print(r2s(999999.4)) -- print(r2s(10000000.6)) -- print(r2s(1000000)) -- TC tends to be repetitive and hard to read -- So these function shorten things considerably by doing -- the "{class,qdisc,filter} add dev %s" for us -- It also means lua keeps less unique strings around. -- Constructing something that was ** reversible ** -- and cleaner to express would be better that this local castring=sf("class add dev %s ", env.IFACE) local fastring=sf("filter add dev %s ",env.IFACE) local qastring=sf("qdisc add dev %s ", env.IFACE) -- Similarly, the 'parent' string is often used local capstring=sf("class add dev %s parent ", env.IFACE) local fapstring=sf("filter add dev %s parent ",env.IFACE) local qapstring=sf("qdisc add dev %s parent ", env.IFACE) local function ca(...) return tc:write(castring,sf(...),"\n") end local function cap(...) return tc:write(capstring,sf(...),"\n") end local function fa(...) return tc:write(fastring,sf(...),"\n") end local function fap(...) return tc:write(fapstring,sf(...),"\n") end local function qa(...) return tc:write(qastring,sf(...),"\n") end local function qap(...) return tc:write(qapstring,sf(...),"\n") end -- FIXME sanely calculate htb rate, overhead, etc, etc local function hap(parent,child,rate,str) cap("%x: classid %x:%x %s htb rate %skibit mtu %d mpu 64 quantum %d %s", parent,parent,child,est,rate,env.MTU,quantum,str) end local function opentc() return popen(sf("%s %s",env.TC, env.TCARG),'w') end function ingress() if env.INGRESS then kernel_prereqs({"sch_ingress"}) local tc = opentc() tc:write(sf("qdisc del dev %s ingress\n",IFACE)) tc:close() end end function resettc() local tc=opentc() tc:write(sf("qdisc del dev %s root\n",IFACE)) tc:close() return opentc() end -- QFQ: Create a bin attached to the parent class local function cb(base,bin,disc) cap("%x classid %x:%x qfq",base,base,bin) qap("%x:%x %s",base,bin,disc) end -- FIXME: It would be nice to have a cleaner way to match all multicast local function fa_mcast(parent) fap("%x: protocol ip prio 5 u32 match u8 0x01 0x01 at -14 flowid %x:%x",parent,parent,MULTICAST) fap("%x: protocol ipv6 prio 6 u32 match u8 0x01 0x01 at -14 flowid %x:%x",parent,parent,MULTICAST) fap("%x: protocol arp prio 7 u32 match u8 0x01 0x01 at -14 flowid %x:%x",parent,parent,MULTICAST) end local function fa_defb(parent) fap("%x: protocol all prio 999 u32 match ip protocol 0 0x00 flowid %x:%x",parent,parent,DEFAULTB) end -- FIXME: This needs a correct hash for natted sources when NAT=y and ipv6 -- handle 3 repeated bad? local function fa_bins(parent) if env.NAT then fap("%x: handle 3 protocol ipv6 prio 94 flow hash keys proto-dst,rxhash divisor %d",parent,env.BINS) fap("%x: handle 4 protocol all prio 97 flow hash keys proto-dst,nfct-src divisor %d",parent,env.BINS) else fap("%x: handle 3 protocol all prio 97 flow hash keys proto-dst,rxhash divisor %d",parent,env.BINS) end pingopt(sf("%x:",parent),sf("%x:%x",parent,DEFAULTB)) -- At one point I was trying to handle ipv6 separately -- fa("protocol ipv6 parent %x: handle 4 prio 98 flow hash keys proto-dst,rxhash divisor %d",parent,env.BINS) end local function faip_bins(parent) if env.NAT then fap("%x: handle 3 protocol ipv6 prio 94 flow hash keys src divisor %d",parent,env.BINS) fap("%x: handle 4 protocol all prio 97 flow hash keys nfct-src divisor %d",parent,env.BINS) else fap("%x: handle 3 protocol all prio 97 flow hash keys src divisor %d",parent,env.BINS) end pingopt(sf("%x:",parent),sf("%x:%x",parent,DEFAULTB)) -- At one point I was trying to handle ipv6 separately -- fa("protocol ipv6 parent %x: handle 4 prio 98 flow hash keys proto-dst,rxhash divisor %d",parent,env.BINS) end local function q_bins(parent) for i=0,env.BINS do cap("%x: classid %x:%x qfq",parent,parent,i) qap("%x:%x %s",parent,i,env.BIGDISC) end end -- We can do simple per-stream load balancing across multiple hardware -- queues thusly. This assumes your IPv6 isn't natted.... local function mqprio_bins(parent,queues) if env.NAT then fap("%x: handle 3 protocol ipv6 prio 94 flow hash keys proto-dst,rxhash divisor %d",parent,queues) fap("%x: handle 4 protocol all prio 97 flow hash keys proto-dst,nfct-src divisor %d",parent,queues) else fap("%x: handle 3 protocol all prio 97 flow hash keys proto-dst,rxhash divisor %d",parent,queues) end -- At one point I was trying to handle ipv6 separately -- fa("protocol ipv6 parent %x: handle 4 prio 98 flow hash keys proto-dst,rxhash divisor %d",parent,BINS) end -- Eric's Enhanced SFQ -- FIXME: originally hard coded for 200Mbit -- I'm going to argue that depth, flows, speed all need to be -- done via something kleinrock-like. The problem is that -- we don't know the delay without hitting the next hop -- And we can't get the next hop until after the interface is -- up. And even then we can only measure RTT, which is off -- by a factor of three on the two different systems I've looked at -- FIXME: I don't think I should be measuring speed in megabits -- Eric's original code had a mtu of 40000, which I assume is needed -- for TSO/GSO to work. These quantums are way too large for lower -- speeds --[[ FIXME: Add ADSL support My crude hack was adding: tc_stab_string="stab overhead 18 mtu 2048 mpu 53 linklayer atm" to generate.sh; and then modifying the addition of root disc like the following tc qdisc add dev $dev root handle 1: ${tc_stab_string} hfsc default ${class_default}0 Then I only needed to reduce the uplink and downlink speed marginally (5%) and got good and stable ping latencies even in the lieu of massive uploads and opening 100 browser tabs at the same time. Without the stab option I had to reduce nominal speeds to around 65%-70% of the line rate and still got worse ping latencies than with the stab option. --]] local function htb_sfq(speed,flows) qa("root handle 1: est 1sec 8sec htb default 1") cap("1: classid 1:1 est 1sec 8sec htb rate 200Mbit mtu 1500 quantum 80000") qap("1:1 handle 10: est 1sec 8sec sfq limit 2000 depth 10 headdrop flows 1000 divisor 16384") end -- local function htb_sfq_red(speed,flows) -- hard coded, no math, reasonable for ~4Mbit local function htb_sfq_red(queues) qa("root handle 1: est 1sec 8sec htb default 1") cap("1: classid 1:1 est 1sec 8sec htb rate %dkibit mtu 1500 quantum 1514",env.UPLINK) qap("1:1 handle 10: est 1sec 4sec sfq limit 200 headdrop flows 500 divisor 16384 redflowlimit 24000 min 3000 max 18000 depth 16 probability 0.12 ecn harddrop") end -- This simplified model is intended to work best with the out of tree -- sfq that has enqueuing to head. It merely looks for background traffic -- and tosses that into a separate bucket. -- Rate and ceil local function htb_sfq_red2(queues) BKTRAFFIC=rbound(env.UPLINK *.05, 32, env.UPLINK*.90) BKTRAFFIC_MAX=rbound(env.UPLINK*.85,32,env.UPLINK*.90) MAXTRAFFIC=round(env.UPLINK-BKTRAFFIC) qa("root handle 1:0 est 1sec 8sec htb default 1") cap("1:0 classid 10:1 est 1sec 8sec htb prio 1 rate %dkbps ceil %dkbps mtu 1500 quantum 1514", env.UPLINK, env.UPLINK) cap("10:1 classid 11:1 est 1sec 8sec htb prio 2 rate %dkbps ceil %dkbps mtu 1500 quantum 1514", BKTRAFFIC,BKTRAFFIC_MAX) cap("10:1 classid 12:1 est 1sec 8sec htb prio 2 rate %dkbps ceil %dkbps mtu 1500 quantum 1514", BKTRAFFIC,BKTRAFFIC_MAX) qap("10:1 handle 12:0 est 1sec 4sec sfq limit 300 headdrop flows 2000 divisor 16384 redflowlimit 24000 min 3000 max 18000 depth 43 probability 0.12 ecn harddrop") qap("11:1 handle 20:0 est 1sec 4sec sfq limit 300 headdrop flows 2000 divisor 16384 redflowlimit 64000 min 3000 max 18000 depth 43 probability 0.12 ecn harddrop") fap("1:1 prio 10 u32 match ip tos 0x08 0xff flowid 11:1") -- fap("1:0 prio 11 u32 match ipv6 tos 0x08 0xfc flowid 1:2") end local function htb_sfq_red_cero(queues) qa("root handle 1: est 1sec 8sec htb default 1") cap("1: classid 1:1 est 1sec 8sec htb rate 4Mbit mtu 1500 quantum 4500") qap("1:1 handle 10: est 1sec 4sec sfq limit 120 perturb 60000") end local function efq(parent, handle, speed, flows) qap(sf("%s handle %x: est 1sec 8sec sfq limit 2000 depth 12 headdrop flows %d divisor 16384", parent,handle,flows)) end local function efqr(parent, handle, speed, flows) qap("%s handle %x: est 1sec 4sec sfq limit 200 quantum 3028 perturb 600 headdrop flows %d depth 24 divisor 16384 redflowlimit 40000 min 4500 max 18000 probability 0.20 ecn",parent,handle,flows) end function iptables4(...) exec(sf("iptables %s",...)) end function iptables6(...) exec(sf("ip6tables %s",...)) end function iptables(...) iptables4(...) iptables6(...) end function recreate_filter(t) assert(t.chain, "ERROR: chain parameter is missing!") assert(t.table, "ERROR: table parameter is missing!") iptables(sf("-t %s -F %s", t.table, t.chain)) iptables(sf("-t %s -X %s", t.table,t.chain)) iptables(sf("-t %s -N %s", t.table,t.chain)) end -- Builds up a list of iptables commands to flush the qos_* chains, -- remove rules referring to them, then delete them -- Print rules in the mangle table, like iptables-save -- Find rules for the qos_* chains -- Exclude rules in qos_* chains (inter-qos_* refs) -- Replace -N with -X and hold, with -F and print -- Replace -A with -D -- Print held lines at the end (note leading newline) -- Make into proper iptables calls -- Note: awkward in previous call due to hold space usage -- Magic borrowed from openwrt generate.sh function iptables_aqm_clean() tstr="-t mangle -S | grep '^-N qos_\|-j qos_' | grep -v '^-A qos_' | sed -e '/^-N/{s/^-N/-X/;H;s/^-X/-F/}' -e 's/^-A/-D/' -e '${p;g}' | sed -n -e 's/^./iptables -t mangle &/p'" iptables4(tstr) tstr="-t mangle -S | grep '^-N qos_\|-j qos_' | grep -v '^-A qos_' | sed -e '/^-N/{s/^-N/-X/;H;s/^-X/-F/}' -e 's/^-A/-D/' -e '${p;g}' | sed -n -e 's/^./ip6tables -t mangle &/p'" iptables6(tstr) end -- ${iptrules:+${iptrules}${N}iptables -t mangle -A qos_${cg}_ct -j CONNMARK --save-mark --mask 0xff} -- iptables -t mangle -A qos_${cg} -j CONNMARK --restore-mark --mask 0xff -- iptables -t mangle -A qos_${cg} -m mark --mark 0/0xff -j qos_${cg}_ct -- $pktrules -- $up$N${down:+${down}$N} FW_PREREQ = { "ipt_multiport", "ipt_CONNMARK" } DL_PREREQ = { "cls_u32", "em_u32", "act_connmark", "act_mirred", "sch_ingress" } OWRT_PREREQ = { "cls_fw", "sch_hfsc","sch_sfq", "sch_red" } function iptm(...) iptables(sf("-t mangle %s", sf(...))) end function aqm_init(cg) iptm("-N qos_%s",cg) iptm("-N qos_%s_ct",cg) iptm("-A qos_%s_ct -j CONNMARK --save-mark --mask 0xff",cg) iptm("-A qos_%s -j CONNMARK --restore-mark --mask 0xff",cg) iptm("-A qos_%s -m mark --mark 0/0xff -j qos_%s_ct",cg,cg) -- $pktrules -- $up$N${down:+${down}$N} end local function mcast_classify(chain,class) iptables(sf("-t mangle -A %s -m pkttype ! --pkt-type unicast -j CLASSIFY --set-class %s",chain,class)) end local ds = { ["BE"]=0, ["AF11"]=10, ["AF12"]=12, ["AF13"]=14, ["AF21"]=18, ["AF22"]=20, ["AF23"]=22, ["AF31"]=26, ["AF32"]=28,["AF33"]=30, ["AF41"]=34, ["AF42"]=36, ["AF43"]=38, ["EF"]=46, ["CS1"]=8, ["CS2"]=16, ["CS3"]=24, ["CS4"]=32, ["CS5"]=40, ["CS6"]=48, ["CS7"]=56, ["BOFH"]=4, ["ANT"]=42, ["LB"]=63, ["P2P"]=9 } -- No matter what I try I get this wrong. You would think 1:1,2,3,4 -- was the right thing. Nope. -- So 1:1 doesn't work. 1:10 doesn't work. Trying 1:101 etc because -- that's a magic value and.... -- And IPv6 multicast is never matched. local function mac80211e() local t = "-t mangle -A W80211e -m dscp --dscp %d -j CLASSIFY --set-class 0:%d -m comment --comment '%s'" local function f(...) iptables(sf(t,...)) end recreate_filter({table="mangle",chain="W80211e"}) iptables("-t mangle -A W80211e -j CLASSIFY --set-class 0:103 -m comment --comment 'Reclassify BE'") f(ds.EF, 106,'Voice (EF)') f(ds.CS6, 106,'Critical (VO)') f(ds.ANT, 105,'Ants(VI)') f(ds.BOFH,105,'Typing (VI)') f(ds.AF41,105,'Net Radio(VI)') f(ds.CS3, 105,'Video (VI)') f(ds.CS1, 101,'Background (BK)') f(ds.CS5, 101,'General Stuff (BK)') f(ds.P2P, 101,'P2P (BK)') f(ds.CS2, 101,'Background (BK)') f(ds.AF33,101,'Background (AF33)') mcast_classify("W80211e","0:103") end -- Iptables wrappers that we need due to lack of filters. Maybe use a -- DEBLOAT chain. It would be good to have a universal number to -- reduce the number of match rules iptables -t mangle -o iface -I -- POSTROUTING -m multicast ! unicast --classify 1:1 local function iptables_probe(iface,rule) end local function iptables_remove(iface,rule) end local function iptables_insert(iface,rule) iptables(sf("-t mangle -o %s -A POSTROUTING -j %s",iface,rule)) end -- Basic SFQ on wireless -- FIXME: We must get ALL multicast out of the other queues -- and into the VO queue. Always. Somehow. -- It also makes sense to do EF into the VO queue and match the -- default behavior inside of the MAC80211 code for scheduling -- purposes. local function wireless_filters() -- FIXME: We need filters to use the various queues -- The only way to get them is to use iptables presently -- and even that's not working end local function wireless_setup(queuetype) qa("handle 1 root mq") qap("1:1 handle %x %s",VO, queuetype) qap("1:2 handle %x %s ",VI, queuetype) qap("1:3 handle %x %s ",BE, queuetype) qap("1:4 handle %x %s",BK, queuetype) wireless_filters() end local function wireless_qlen_change(base) for i=1, # qlens do q = qlens[i] d = sf("%s/%s",base,q) qlen = slurpf(d) if qlen ~= nill and qlen ~= env[q] then spewf(d,env[q]) end end end local function wireless_qlen() -- Sure we could inspect things here but whatever wireless_qlen_change(sf("%s/%s",wireless_debug,"phy0/ath9k")) wireless_qlen_change(sf("%s/%s",wireless_debug,"phy1/ath9k")) wireless_qlen_change(sf("%s/%s",wireless_debug,"phy2/ath9k")) end local function wireless_setup_ll(queuetype) qa("handle 1 root mq") qap("1:1 handle %x %s limit 800 noecn target 10ms quantum 500",VO, queuetype) qap("1:2 handle %x %s limit 800 ecn quantum %d",VI, queuetype, env.CODEL_LL_QUANTUM) qap("1:3 handle %x %s limit 1000 ecn quantum %d",BE, queuetype, env.CODEL_LL_QUANTUM) qap("1:4 handle %x %s limit 1000 noecn ",BK, queuetype) wireless_filters() end -- ingress model -- this is why I wanted my tc things -- to be objects, e.g: -- dev = tc.new() -- dev:qa ifb:qa local function sfqred_ingress() RATE=env.DOWNLINK if RATE == nil then return end kernel_prereqs({"sch_cbq","act_mirred","cls_fw"}) IFB="ifb0" ALLOT=1524 -- not needed? ingress() qa("ingress") fap("ffff: protocol all u32 match u32 0 0 flowid 1:1 action mirred egress redirect dev %s",IFB) -- Lets say our NIC is 100Mbit tc:write(sf("qdisc add dev %s root handle 1: cbq avpkt 1000 rate 100Mbit bandwidth 100Mbit\n",IFB)) -- FIXME: allocate bandwidth from underlying interface tc:write(sf("class add dev %s parent 1: classid 1:1 cbq allot 1524 mpu 64 rate 100Mbit prio 1 bandwidth 100Mbit maxburst 150 avpkt 1500 bounded\n", IFB)) -- Class for traffic coming from Internet : limited to X Mbits tc:write(sf("class add dev %s parent 1:1 classid 1:11 cbq allot %d mpu 64 rate %dkbit bandwidth %dkbit maxburst 80 minburst 40 prio 2 avpkt 1400 bounded\n", IFB, ALLOT, RATE, RATE)) tc:write(sf("qdisc add dev %s parent 1:11 handle 11: sfq limit 300 quantum 1500 headdrop flows 2048 divisor 16384 redflowlimit 60000 min 9000 max 27000 probability 0.20 ecn harddrop\n", IFB)) -- Traffic from machines in our LAN : no limit --for i,privnet in pairs(LOCALNETS) do -- tc:write(sf("filter add dev %s parent 1: protocol ip prio 2 u32 match ip src %s flowid 1:1\n", IFB, privnet)) -- end tc:write(sf("filter add dev %s parent 1: protocol all prio 2 u32 match ip protocol 0 0x00 flowid 1:11\n", IFB)) end -- Various models local function wireless_efq_codel() wireless_setup("efq_codel limit 1000 quantum 1000") end local function wireless_efq_codel_ll() wireless_setup_ll("efq_codel") end local function wireless_nfq_codel() wireless_setup("nfq_codel limit 1000 quantum 1000") end local function wireless_nfq_codel_ll() wireless_setup_ll("nfq_codel") end local function wireless_fq_codel() wireless_setup("fq_codel limit 1000 quantum 1000") end local function wireless_fq_codel_ll() wireless_setup_ll("fq_codel") end local function wireless_codel() wireless_setup("codel limit 1000") end local function wireless_ns2_codel() wireless_setup("ns2_codel limit 1000") end local function wireless_sfq() wireless_setup("sfq limit 40 perturb 6000") end -- erics sfq and erics sfqred with -- some arbitrary speeds and bandwidths (unused) -- TiQ would be better local function wireless_efq() qa("handle 1 root mq") efq("1:1",VO,20,30) efq("1:2",VI,50,20) efq("1:3",BE,150,100) efq("1:4",BK,30,10) wireless_filters() end local function wireless_efqr() qa("handle 1 root mq") efqr("1:1",VO,20,30) efqr("1:2",VI,50,20) efqr("1:3",BE,150,2000) efqr("1:4",BK,30,10) wireless_filters() end -- FIXME: add HTB rate limiter support for a hm gateway -- What we want are various models expressed object orientedly -- so we can tie them together eventually -- This is not that. We ARE trying to get to where the numbering -- schemes are consistent enough to tie everything together -- sanely... local function model_qfq_subdisc(base) cb(base,MULTICAST,env.MDISC) cb(base,DEFAULTB,env.NORMDISC) fa_defb(base) fa_mcast(base); q_bins(base); fa_bins(base); end local function model_choke_subdisc(base) env.BIGDISC=sf("choke bandwidth %dkbit limit 24 min 4 max 12 probability .2 ecn",env.UPLINK) model_qfq_subdisc(base) end local function one_over_ip(base) cb(base,MULTICAST,env.MDISC) cb(base,DEFAULTB,env.NORMDISC) fa_defb(base) fa_mcast(base); q_bins(base); faip_bins(base); end -- DRR local function drr(queues) kernel_prereqs({"sch_drr"}) qa("root handle 1: drr") cap("1: classid 1:%x drr",env.BINS+1) cap("1: classid 1:%x drr",env.BINS+2) qap("1:%x pfifo_head_drop limit 16",env.BINS+1) qap("1:%x pfifo_head_drop limit 16",env.BINS+2) fa_defb(1) fa_mcast(1) for i=1,env.BINS do cap("1: classid 1:%x drr",i) qap("1:%x pfifo_head_drop limit 16",i) end fap("%x: handle 10 protocol all prio 10 flow hash keys proto-src,dst divisor %d perturb 10", 1, env.BINS); end -- Choke local function choke(queues) kernel_prereqs({"sch_choke"}) if env.UPLINK ~= nil then local up=env.UPLINK local quantum = rate_quantum(up) local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) local mtu = env.MTU -- local limit,min,max = chokelimit(up,env.DOWNLINK) qa("root handle 1: choke bandwidth %dkbit limit 32 min 4 max 12 probability .2 ecn", up) end end -- hap -- Using the same parameters as in Section V, we can estimate , queue -- size oscillates the minimal N to be 8.08. When packets, constantly -- turning CHOKe on and off, around as shown in Fig. 9 (compare with -- Fig. 8). When is small, the equilibrium model in Section II no -- longer holds. The same phenomenon is observed when increases (with -- fixed ). The lower bound on dropping probability when CHOKe is -- active, , eventually prevents TCP flows from making full use of the -- available capacity. A positive effect is that the queue length is -- controlled to stay around 20. We have also simulated with more th local function htb_choke(queues) kernel_prereqs({"sch_choke", "sch_htb"}) if env.UPLINK ~= nil then local up=env.UPLINK local quantum = rate_quantum(up) local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) local mtu = env.MTU qa("root handle 1: %s htb default 1", est) cap("1: classid 1:1 %s htb burst 64 rate %dkibit mtu 1500 mpu 64", est, up) -- qap("1:1 handle 10: choke bandwidth %dkbit limit 32 min 4 max 16 probability .2 ecn", -- up) -- at 4Mbit, choke will keep 2 .. 8 packets for min, max if -- you just set the bandwidth -- at 40mbit choke will keep 4..12 -- from the doc, this is bytes. From the code, this turns pkts into avpkts?? qap("1:1 handle 10: choke bandwidth %dkbit limit 24 min 2 max 12 probability .2 ecn", up) end end -- SFB local function sfb(queues) kernel_prereqs({"sch_sfb"}) qa("root handle 1: sfb") end local function htb_sfb(queues) kernel_prereqs({"sch_sfb","sch_htb"}) if env.UPLINK ~= nil then local up=env.UPLINK local quantum = rate_quantum(up) local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) local mtu = env.MTU qa("root handle 1: %s htb default 1", est) cap("1: classid 1:1 %s htb burst 64 rate %dkibit mtu 1500 mpu 64 quantum 1500", est, up) qap("1:1 handle 10: sfb limit 40") end end local function tbf(queues) kernel_prereqs({"sch_tbf"}) if env.UPLINK ~= nil then local up=env.UPLINK local quantum = rate_quantum(up) local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) local mtu = env.MTU qa("root handle 1: %s tbf rate %dkbit burst 5k latency 5ms minburst 1540", est, up) end end local function htb_qfq_sfqred(queues) kernel_prereqs({"sch_sfq","sch_qfq"}) if env.UPLINK ~= nil then local up=env.UPLINK local quantum = rate_quantum(up) local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) local mtu = env.MTU qa("root handle 1: est 1sec 2sec htb default 1") cap("1: classid 1:1 est 1sec 2sec htb burst 64 rate %dkibit mtu 1500 mpu 64 quantum 1500",up) qap("1:1 handle %x qfq",10) -- FIXME, do the calculation for the bandwidth env.BIGDISC="sfq limit 80 headdrop quantum 1500 flows 200 divisor 16384 redflowlimit 9000 min 1500 max 4500 depth 12 probability 0.2 ecn harddrop" -- env.BIGDISC="sfq limit 80" one_over_ip(10) end end local function htb_qfq_drop_head(queues) if env.UPLINK ~= nil then local up=env.UPLINK local quantum = rate_quantum(up) local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) local mtu = env.MTU qa("root handle 1: est 1sec 8sec htb default 1") cap("1: classid 1:1 est 1sec 8sec htb rate %dkbit mtu 1500 quantum 1500",up) qap("1:1 handle %x qfq",10) model_qfq_subdisc(10) end end local function htb_qfq_choke(queues) if env.UPLINK ~= nil then local up=env.UPLINK local quantum = rate_quantum(up) local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) local mtu = env.MTU qa("root handle 1: est 1sec 8sec htb default 1") cap("1: classid 1:1 est 1sec 8sec htb rate %dkbit mtu 1500 quantum 1500",up) qap("1:1 handle %x qfq",10) model_choke_subdisc(10) end end -- FIXME: Finish this up local function model_qfq_ared(base) cb(base,MULTICAST,env.MDISC) cb(base,DEFAULTB,env.NORMDISC) fa_defb(base) fa_mcast(base); q_bins(base); fa_bins(base); end local function model_qfq_red(base) cb(base,MULTICAST,env.MDISC) cb(base,DEFAULTB,env.NORMDISC) fa_defb(base) fa_mcast(base); q_bins(base); fa_bins(base); end local function model_sfq(base) qa("parent %x sfq",base) end -- Wireless devices are multi-queued - BUT the hardware -- enforces differences in behavior vs the queues -- (actually hostapd does that) -- FIXME: get a grip on lua iterators local function wireless_qfq() wireless_setup("qfq") for i,v in ipairs(WQUEUES) do model_qfq_subdisc(v) end end local function wireless_qfqr() wireless_setup("qfq") for i,v in ipairs(WQUEUES) do model_qfq_ared(v) end end -- FIXME: just stubs for now local function wireless_ared() qa("handle 1 root mq") for i,v in ipairs(WQUEUES) do model_qfq_ared(v) end wireless_filters() end -- FIXME: just stubs for now local function wireless_red() qa("handle 1 root mq") for i,v in ipairs(WQUEUES) do model_qfq_red(v) end wireless_filters() end -- FIXME - mqprio might not be available -- FIXME - rethink multi-queue idea local function ethernet_qfq(queues) c = queues -- for i=0,c do if queues > 1 then qa("handle %x root qfq",10) else qa("handle %x root qfq",10) end model_qfq_subdisc(10) end local function ethernet_efq_codel(queues) qa("handle %x root efq_codel limit 1000 ",10) end local function ethernet_efq_codel_ll(queues) qa("handle %x root efq_codel limit 1000 quantum 1000 ",10) end local function ethernet_nfq_codel(queues) qa("handle %x root nfq_codel limit 1000 ",10) end local function ethernet_nfq_codel_ll(queues) qa("handle %x root nfq_codel limit 1000 quantum 1000 ",10) end local function ethernet_fq_codel(queues) qa("handle %x root fq_codel",10) end local function ethernet_fq_codel_ll(queues) qa("handle %x root fq_codel limit 1000 quantum 1000 ",10) end local function ethernet_codel(queues) qa("handle %x root codel",10) end local function ethernet_ns2_codel(queues) qa("handle %x root ns2_codel",10) end local function ethernet_sfq(queues) qa("handle %x root sfq",10) end local function ethernet_efq(queues) qa("root handle %x est 1sec 8sec sfq limit 2000 depth 24 headdrop flows %d divisor 16384",10, 150) end local function ethernet_efqr(queues) -- FIXME, we can do sane things with speed here qa("root handle %x: est 1sec 4sec sfq limit 300 depth 20 headdrop perturb 60000 flows %d divisor 16384 redflowlimit 32000 min 4500 max 18000 probability 0.20 ecn harddrop",10,2000) end -- FIXME: just stubs for now local function ethernet_ared(queues) qa("handle %x root red",10) end local function ethernet_red(queues) qa("handle %x root red",10) end -- I don't know when a good time to have a larger quantum would be good -- Proto all makes more sense maybe local function fw_fap(parent, class, v, pref) local mask = 0xff -- tc filter add dev ge00 parent 1: prio 4 protocol ip handle 4/0xff fw flowid 1:40 -- fap("%s protocol ip pref %d fw 0x%x/0x%x classid %s", parent,pref, v, mask, class ) -- fap("%s protocol ipv6 pref %d fw 0x%x/0x%x classid %s", parent,pref, v, mask, class fap("%s protocol all pref %d handle %x/0x%x fw flowid %s", parent, pref, v, mask, class ) end -- return min, max local function uplink_to_red(uplink) end local function wireless_sfqr(queues) -- local quantum = rate_quantum(up) -- convert to number local quantum = rate_quantum(4000) local mtu = env.MTU -- local mtu = 1500 -- trying to find an operating point that does useful stuff on -- wireless yet lets packet agg work better local function red2(parent,handle,prio,mark) qap("1:%x handle %x: sfq limit 120 headdrop perturb 60000 flows %d divisor 16384 quantum 3000 depth 24 redflowlimit 60000 min 18000 max 50000 probability 0.20 ecn harddrop", parent, handle, 2000) -- qap("1:%x handle %x: sfq limit 120 headdrop perturb 60000 flows %d divisor 16384 quantum 4500 depth 24 redflowlimit 30000 min 6000 max 18000 probability 0.20 ecn harddrop", parent, handle, 2000) -- fw_fap("1:",sf("1:%x",handle),prio,mark) end qa("root handle 1: mq ") red(1,10,1,1) red(2,20,2,2) red(3,30,3,3) red(4,40,4,4) end local function fourtier(queues) kernel_prereqs({"sch_sfq", "ifb", "act_mirred"}) if env.UPLINK ~= nil then local up=env.UPLINK local quantum = rate_quantum(up) local est = sf("est %dsec %dsec", env.EST_MIN, env.EST_MAX) local mtu = env.MTU -- just straight sfq local function red4(parent,handle,prio,mark) cap("1: classid 1:%x %s htb rate %skibit mtu %d mpu 64 quantum %d", parent, est, up, mtu, quantum) qap("1:%x handle %x: %s sfq limit 200", parent, handle, est) fw_fap("1:",sf("1:%x",handle),prio,mark) end -- Hammered down on the defaults local function red(parent,handle,prio,mark) cap("1: classid 1:%x %s htb rate %skibit mtu %d mpu 64 quantum %d", parent, est, up, mtu, quantum) qap("1:%x handle %x: %s sfq limit 200 headdrop quantum 1524 perturb 60000 flows %d divisor 16384 depth 24 redflowlimit 9000 min 1500 max 4500 probability 0.20 ecn harddrop", parent, handle, est, 2000) fw_fap("1:",sf("1:%x",handle),prio,mark) end -- still trying to find a useful operating point local function red2(parent,handle,prio,mark) cap("1: classid 1:%x %s htb rate %skibit mtu %d mpu 64 quantum %d", parent, est, up, mtu, quantum) qap("1:%x handle %x: %s sfq limit 200 headdrop quantum 1524 perturb 60000 flows %d divisor 16384 depth 24 redflowlimit 40000 min 4500 max 9000 probability 0.20 ecn harddrop", parent, handle, est, 2000) fw_fap("1:",sf("1:%x",handle),prio,mark) end qa("root handle 1: %s htb default 1", est) red(1,10,1,1) red(2,20,2,2) red(3,30,3,3) red(4,40,4,4) if env.DOWNLINK ~= nil and env.INGRESS ~= nil then sfqred_ingress() end end end -- Openwrt emulation -- The current openwrt shaper uses a combination of -- HFSC, SFQ, and RED function tcrules(cstr,rate) dir="/usr/lib/aqm" if file(sf("%s/tcrules.awk")) then spewc(sf("awk -v device=\"%s\" -v linespeed=\"%s\" -f %s/tcrules.awk",env.IFACE,rate,dir), cstr) end end function oopenwrt(queues) if env.UPLINK == nil or env.DOWNLINK == nil then usage("UPLINK and DOWNLINK environment variables are required") end local ul = env.UPLINK qa("root handle 1: hfsc default 3") cap("1: classid 1:1 hfsc sc rate %dkbit ul rate %dkbit",ul,ul) usage("Not complete yet") if env.DOWNLOAD then qa("ingress") fap("ffff: protocol ip prio 1 u32 match u32 0 0 flowid 1:1 action connmark action mirred egress redirect dev ifb%d",ifbdev,n) end end -- Wondershaper emulation local function unsupported_shaper(queues) usage("Shaper not supported for wireless") end -- The wondershaper -- This is improved over the original wondershaper with -- Correct ECN support -- FIXME: IPv6 support -- low priority source netmasks -- NOPRIOHOSTSRC= -- low priority destination netmasks -- NOPRIOHOSTDST= -- low priority source ports -- NOPRIOPORTSRC= -- low priority destination ports -- NOPRIOPORTDST="21" local function wshaper(queues) if env.UPLINK == nil or env.DOWNLINK == nil then usage("UPLINK and DOWNLINK environment variables are required") end local UPLINK = env.UPLINK local DOWNLINK = env.DOWNLINK local v6 = env.IPV6 qa("root handle 1: htb default 20") -- shape everything at UPLINK speed - this prevents huge queues in -- your next hop gateway which destroy latency: cap("1: classid 1:1 htb rate %skbit burst 6k",UPLINK) cap("1:1 classid 1:10 htb rate %dkbit burst 6k prio 1",UPLINK) cap("1:1 classid 1:20 htb rate %dkbit burst 6k prio 2",round(9*UPLINK/10)) cap("1:1 classid 1:30 htb rate %dkbit burst 6k prio 2",round(8*UPLINK/10)) -- all get Stochastic Fairness: -- Note the perturb option is rather excessive now qap("1:10 handle 10: sfq perturb 10") qap("1:20 handle 20: sfq perturb 10") qap("1:30 handle 30: sfq perturb 10") -- TOS Minimum Delay (ssh, NOT scp) in 1:10: fap("1:0 protocol ip prio 10 u32 match ip tos 0x10 0x%x flowid 1:10",env.ECNMASK) -- ICMP (ip protocol 1) in the interactive class 1:10 so we -- can do measurements & impress our friends: pingopt("1:0","1:10") -- This is just so wrong in so many cases: -- To speed up downloads while an upload is going on, put ACK packets -- in the interactive class: fap([[1: protocol ip prio 10 u32 \ match ip protocol 6 0xff \ match u8 0x05 0x0f at 0 \ match u16 0x0000 0xffc0 at 2 \ match u8 0x10 0xff at 33 flowid 1:10]]) -- There is no IPv6 support in old wondershaper -- FIXME - figure out how to find acks in ipv6 -- some traffic however suffers a worse fate local function np(args,prio,match,h) for i,v in pairs(args:split(" ")) do fap("1: protocol ip prio %d u32 \ match ip %s %s %s flowid 1:30",prio,match,v, h) if v6 then -- fap("1: protocol ipv6 prio %d u32 \ -- match ip %s %s %s flowid 1:30",prio,match,v, h) end end end if env.NOPRIOPORTDST ~= nil then np(env.NOPRIOPORTDST,14,"dport","0xffff") end if env.NOPRIOPORTSRC ~= nil then np(env.NOPRIOPORTSRC,15,"sport", "0xffff") end if env.NOPRIOHOSTSRC ~= nil then np(env.NOPRIOHOSTSRC,16,"src"," ") end if env.NOPRIOHOSTDST ~= nil then np(env.NOPRIOHOSTDST,17,"dst"," ") end -- rest is 'non-interactive' ie 'bulk' and ends up in 1:20 fap("1: protocol ip prio 18 u32 match ip dst 0.0.0.0/0 flowid 1:20") -------------------- downlink -------------------------- -- slow downloads down to somewhat less than the real speed to prevent -- queuing at our ISP. Tune to see how high you can set it. ISPs tend -- to have *huge* queues to make sure big downloads are fast -- -- attach ingress policer: qa("handle ffff: ingress") -- filter *everything* to it (0.0.0.0/0), drop everything that's -- coming in too fast: fap("ffff: protocol ip prio 50 u32 match ip src 0.0.0.0/0 police rate %dkbit burst 10k drop flowid :1",DOWNLINK) end -- For reference, this is a slavish re-implementation of the original -- wondershaper, bugs with ecn, and ipv6 included -- The new SFQ should also improve it. -- That said, it is seriously flawed and I'd -- like to be able to fully model it's behavior, -- hence this implementation. local function owshaper(queues) env.ECNMASK = 0xff -- original wshaper screws up on ecn env.IPV6 = false return wshaper(queues) end -- I have to think about the calculations for 100Mbit and below... -- FIXME: Think on the architecture and models harder -- first. Need to also be able to stick HSFC, netem, or HTB -- on top of this WCALLBACKS = { ["qfq"] = wireless_qfq, ["qfqred"] = wireless_qfqr, ["red"] = wireless_red, ["ared"] = wireless_ared, ["sfq"] = wireless_sfq, ["efq"] = wireless_efq, ["sfqred"] = wireless_efqr, ["sfqr"] = wireless_sfqr, ["codel"] = wireless_codel, ["fq_codel"] = wireless_fq_codel, ["fq_codel_ll"] = wireless_fq_codel_ll, ["nfq_codel"] = wireless_nfq_codel, ["nfq_codel_ll"] = wireless_nfq_codel_ll, ["efq_codel"] = wireless_efq_codel, ["efq_codel_ll"] = wireless_efq_codel_ll, ["ns2_codel"] = wireless_ns2_codel, ["ns2_codel_ll"] = wireless_ns2_codel_ll, ["htb_sfq_red"] = unsupported_shaper, ["oopenwrt"] = unsupported_shaper, ["owshaper"] = unsupported_shaper, ["wshaper"] = wireless_wshaper, ["twotier"] = unsupported_shaper, ["fourtier"] = unsupported_shaper } ECALLBACKS = { ["qfq"] = ethernet_qfq, ["qfqred"] = ethernet_qfqr, ["red"] = ethernet_red, ["ared"] = ethernet_ared, ["sfq"] = ethernet_sfq, ["codel"] = ethernet_codel, ["ns2_codel"] = ethernet_ns2_codel, ["fq_codel"] = ethernet_fq_codel, ["fq_codel_ll"] = ethernet_fq_codel_ll, ["nfq_codel"] = ethernet_nfq_codel, ["nfq_codel_ll"] = ethernet_nfq_codel_ll, ["efq_codel"] = ethernet_efq_codel, ["efq_codel_ll"] = ethernet_efq_codel_ll, ["efq"] = ethernet_efq, ["sfqred"] = ethernet_efqr, ["htb_qfq_sfqred"] = htb_qfq_sfqred, ["htb_qfq_drop_head"] = htb_qfq_drop_head, ["htb_sfq_red"] = htb_sfq_red, ["htb_sfq_red2"] = htb_sfq_red2, ["htb_sfq_red_cero"] = htb_sfq_red_cero, ["tbf"] = tbf, ["htb_sfb"] = htb_sfb, ["choke"] = choke, ["htb_choke"] = htb_choke, ["htb_qfq_choke"] = htb_qfq_choke, ["sfb"] = sfb, ["drr"] = drr, ["oopenwrt"] = oopenwrt, ["wshaper"] = wshaper, ["owshaper"] = owshaper, ["owshaper"] = owshaper, ["twotier"] = twotier, ["fourtier"] = fourtier, } -- couple other models - dsl, wshaper, etc, needed -- pingopt and argv processing too local function wireless(model) print(model) if WCALLBACKS[model] ~= nil then wireless_qlen() return WCALLBACKS[model]() else usage("AQM model not found") end return nil end local function ethernet(model) if ECALLBACKS[model] ~= nil then return ECALLBACKS[model](ethernet_setup(IFACE)) else usage("AQM model not found") end return nil end -- It's annoying to get deletion as an error -- And confusing to the user if env.PINGOPT then pingopt = function(parent,flow) fap("%s protocol ip prio 10 u32 match ip protocol 1 0xff flowid %s",parent,flow) if(env.IPV6) then -- arguably we should only match echo and echo reply but... fap("%s protocol ipv6 prio 10 u32 match ip protocol 1 0xff flowid %s",parent,flow) end end end if env.QDEBUG then env.TC="/bin/cat" env.TCARG=" " env.DEBLOATLOG="/tmp/debloat.log" end -- FIXME - do something intelligent when faced with a bridge or vlan itype=interface_type(IFACE) if itype == 'wireless' or itype == 'ethernet' then kernel_prereqs(PREREQS) tc = resettc() if itype == 'wireless' then wireless(env.QMODEL) -- FIXME: you watch this code set the class, then not show up in tc -- mac80211e() -- iptables_insert(IFACE,"W80211e") end if itype == 'ethernet' then ethernet(env.QMODEL) end end