summaryrefslogtreecommitdiffstats
path: root/bcm283x-Pull-upstream-fixes-plus-iproc-mmc-driver.patch
blob: 56f2a39816fadbe68cd6f9c43772b26a12b4b692 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492
3493
3494
3495
3496
3497
3498
3499
3500
3501
3502
3503
3504
3505
3506
3507
3508
3509
3510
3511
3512
3513
3514
3515
3516
3517
3518
3519
3520
3521
3522
3523
3524
3525
3526
3527
3528
3529
3530
3531
3532
3533
3534
3535
3536
3537
3538
3539
3540
3541
3542
3543
3544
3545
3546
3547
3548
3549
3550
3551
3552
3553
3554
3555
3556
3557
3558
3559
3560
3561
3562
3563
3564
3565
3566
3567
3568
3569
3570
3571
3572
3573
3574
3575
3576
3577
3578
3579
3580
3581
3582
3583
3584
3585
3586
3587
3588
3589
3590
3591
3592
3593
3594
3595
3596
3597
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607
3608
3609
3610
3611
3612
3613
3614
From 735f01326873349426f041a4fa2f5703a1ed43a4 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 5 Feb 2016 15:06:15 -0800
Subject: [PATCH 01/36] drm/vc4: Fix a framebuffer reference leak on async flip
 interrupt.

We'd need X to queue up an async pageflip while another is
outstanding, and then take a SIGIO.  I think X actually avoids sending
out the next pageflip while one's already queued, but I'm not sure.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_crtc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 018145e..989ee72 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -544,6 +544,7 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
 	/* Make sure all other async modesetes have landed. */
 	ret = down_interruptible(&vc4->async_modeset);
 	if (ret) {
+		drm_framebuffer_unreference(fb);
 		kfree(flip_state);
 		return ret;
 	}
-- 
2.7.3

From e1ceac2cefbda12d1d9d9ee547fc0cc8bfeebde6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 12 Feb 2016 14:15:14 -0800
Subject: [PATCH 02/36] drm/vc4: Bring HDMI up from power off if necessary.

If the firmware hadn't brought up HDMI for us, we need to do its
power-on reset sequence (reset HD and and clear its STANDBY bits,
reset HDMI, and leave the PHY disabled).

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_hdmi.c | 29 ++++++++++++++++++++++++++++-
 drivers/gpu/drm/vc4/vc4_regs.h |  2 ++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index c69c046..6e55760 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -495,6 +495,16 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 		goto err_put_i2c;
 	}
 
+	/* This is the rate that is set by the firmware.  The number
+	 * needs to be a bit higher than the pixel clock rate
+	 * (generally 148.5Mhz).
+	 */
+	ret = clk_set_rate(hdmi->hsm_clock, 163682864);
+	if (ret) {
+		DRM_ERROR("Failed to set HSM clock rate: %d\n", ret);
+		goto err_unprepare_pix;
+	}
+
 	ret = clk_prepare_enable(hdmi->hsm_clock);
 	if (ret) {
 		DRM_ERROR("Failed to turn on HDMI state machine clock: %d\n",
@@ -516,7 +526,24 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 	vc4->hdmi = hdmi;
 
 	/* HDMI core must be enabled. */
-	WARN_ON_ONCE((HD_READ(VC4_HD_M_CTL) & VC4_HD_M_ENABLE) == 0);
+	if (!(HD_READ(VC4_HD_M_CTL) & VC4_HD_M_ENABLE)) {
+		HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_SW_RST);
+		udelay(1);
+		HD_WRITE(VC4_HD_M_CTL, 0);
+
+		HD_WRITE(VC4_HD_M_CTL, VC4_HD_M_ENABLE);
+
+		HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL,
+			   VC4_HDMI_SW_RESET_HDMI |
+			   VC4_HDMI_SW_RESET_FORMAT_DETECT);
+
+		HDMI_WRITE(VC4_HDMI_SW_RESET_CONTROL, 0);
+
+		/* PHY should be in reset, like
+		 * vc4_hdmi_encoder_disable() does.
+		 */
+		HDMI_WRITE(VC4_HDMI_TX_PHY_RESET_CTL, 0xf << 16);
+	}
 
 	drm_encoder_init(drm, hdmi->encoder, &vc4_hdmi_encoder_funcs,
 			 DRM_MODE_ENCODER_TMDS, NULL);
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 4e52a0a..85c36d2 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -456,6 +456,8 @@
 #define VC4_HDMI_TX_PHY_RESET_CTL		0x2c0
 
 #define VC4_HD_M_CTL				0x00c
+# define VC4_HD_M_REGISTER_FILE_STANDBY		(3 << 6)
+# define VC4_HD_M_RAM_STANDBY			(3 << 4)
 # define VC4_HD_M_SW_RST			BIT(2)
 # define VC4_HD_M_ENABLE			BIT(0)
 
-- 
2.7.3

From 63d38d99739736480b24c9f9bd7880ce4e49eb0c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 12 Feb 2016 15:16:56 -0800
Subject: [PATCH 03/36] drm/vc4: Add another reg to HDMI debug dumping.

This is also involved in the HDMI setup sequence so it's nice to see
it.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_hdmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 6e55760..56272ca 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -95,6 +95,7 @@ static const struct {
 	HDMI_REG(VC4_HDMI_SW_RESET_CONTROL),
 	HDMI_REG(VC4_HDMI_HOTPLUG_INT),
 	HDMI_REG(VC4_HDMI_HOTPLUG),
+	HDMI_REG(VC4_HDMI_RAM_PACKET_CONFIG),
 	HDMI_REG(VC4_HDMI_HORZA),
 	HDMI_REG(VC4_HDMI_HORZB),
 	HDMI_REG(VC4_HDMI_FIFO_CTL),
-- 
2.7.3

From 46e96facb9b67486285c26f88ee747b8d9f4abc9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Feb 2016 17:06:02 -0800
Subject: [PATCH 04/36] drm/vc4: Fix the name of the VSYNCD_EVEN register.

It's used for delaying vsync in interlaced mode.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_crtc.c | 2 +-
 drivers/gpu/drm/vc4/vc4_regs.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 989ee72..5e84be2 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -83,7 +83,7 @@ static const struct {
 } crtc_regs[] = {
 	CRTC_REG(PV_CONTROL),
 	CRTC_REG(PV_V_CONTROL),
-	CRTC_REG(PV_VSYNCD),
+	CRTC_REG(PV_VSYNCD_EVEN),
 	CRTC_REG(PV_HORZA),
 	CRTC_REG(PV_HORZB),
 	CRTC_REG(PV_VERTA),
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 85c36d2..d529665 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -187,7 +187,7 @@
 # define PV_VCONTROL_CONTINUOUS			BIT(1)
 # define PV_VCONTROL_VIDEN			BIT(0)
 
-#define PV_VSYNCD				0x08
+#define PV_VSYNCD_EVEN				0x08
 
 #define PV_HORZA				0x0c
 # define PV_HORZA_HBP_MASK			VC4_MASK(31, 16)
-- 
2.7.3

From baff41935a7b4c1b6015a99a0ca222fd0a5552b9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Feb 2016 17:31:41 -0800
Subject: [PATCH 05/36] drm/vc4: Fix setting of vertical timings in the CRTC.

It looks like when I went to add the interlaced bits, I just took the
existing PV_VERT* block and indented it, instead of copy and pasting
it first.  Without this, changing resolution never worked.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_crtc.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 5e84be2..93d53c2 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -212,6 +212,16 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc)
 				 PV_HORZB_HFP) |
 		   VC4_SET_FIELD(mode->hdisplay, PV_HORZB_HACTIVE));
 
+	CRTC_WRITE(PV_VERTA,
+		   VC4_SET_FIELD(mode->vtotal - mode->vsync_end,
+				 PV_VERTA_VBP) |
+		   VC4_SET_FIELD(mode->vsync_end - mode->vsync_start,
+				 PV_VERTA_VSYNC));
+	CRTC_WRITE(PV_VERTB,
+		   VC4_SET_FIELD(mode->vsync_start - mode->vdisplay,
+				 PV_VERTB_VFP) |
+		   VC4_SET_FIELD(vactive, PV_VERTB_VACTIVE));
+
 	if (interlace) {
 		CRTC_WRITE(PV_VERTA_EVEN,
 			   VC4_SET_FIELD(mode->vtotal - mode->vsync_end - 1,
-- 
2.7.3

From 6f7cde6ad6e866660b8e5607a213872e5f34e8fd Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 16 Feb 2016 10:24:08 -0800
Subject: [PATCH 06/36] drm/vc4: Initialize scaler DISPBKGND on modeset.

We weren't updating the interlaced bit, so we'd scan out incorrectly
if the firmware had brought up the TV encoder and we were switching to
HDMI.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_crtc.c |  6 ++++++
 drivers/gpu/drm/vc4/vc4_regs.h | 14 ++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 93d53c2..6ae5abc 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -183,6 +183,8 @@ static int vc4_get_clock_select(struct drm_crtc *crtc)
 
 static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc)
 {
+	struct drm_device *dev = crtc->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
 	struct drm_crtc_state *state = crtc->state;
 	struct drm_display_mode *mode = &state->adjusted_mode;
@@ -251,6 +253,10 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc)
 		   PV_CONTROL_FIFO_CLR |
 		   PV_CONTROL_EN);
 
+	HVS_WRITE(SCALER_DISPBKGNDX(vc4_crtc->channel),
+		  SCALER_DISPBKGND_AUTOHS |
+		  (interlace ? SCALER_DISPBKGND_INTERLACE : 0));
+
 	if (debug_dump_regs) {
 		DRM_INFO("CRTC %d regs after:\n", drm_crtc_index(crtc));
 		vc4_crtc_dump_regs(vc4_crtc);
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index d529665..7c29993 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -350,6 +350,17 @@
 # define SCALER_DISPCTRLX_HEIGHT_SHIFT		0
 
 #define SCALER_DISPBKGND0                       0x00000044
+# define SCALER_DISPBKGND_AUTOHS		BIT(31)
+# define SCALER_DISPBKGND_INTERLACE		BIT(30)
+# define SCALER_DISPBKGND_GAMMA			BIT(29)
+# define SCALER_DISPBKGND_TESTMODE_MASK		VC4_MASK(28, 25)
+# define SCALER_DISPBKGND_TESTMODE_SHIFT	25
+/* Enables filling the scaler line with the RGB value in the low 24
+ * bits before compositing.  Costs cycles, so should be skipped if
+ * opaque display planes will cover everything.
+ */
+# define SCALER_DISPBKGND_FILL			BIT(24)
+
 #define SCALER_DISPSTAT0                        0x00000048
 #define SCALER_DISPBASE0                        0x0000004c
 # define SCALER_DISPSTATX_MODE_MASK		VC4_MASK(31, 30)
@@ -362,6 +373,9 @@
 # define SCALER_DISPSTATX_EMPTY			BIT(28)
 #define SCALER_DISPCTRL1                        0x00000050
 #define SCALER_DISPBKGND1                       0x00000054
+#define SCALER_DISPBKGNDX(x)			(SCALER_DISPBKGND0 +        \
+						 (x) * (SCALER_DISPBKGND1 - \
+							SCALER_DISPBKGND0))
 #define SCALER_DISPSTAT1                        0x00000058
 #define SCALER_DISPSTATX(x)			(SCALER_DISPSTAT0 +        \
 						 (x) * (SCALER_DISPSTAT1 - \
-- 
2.7.3

From 449c91f1f06a573ad4a3edd18d7b493bf44478f6 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 28 Dec 2015 14:14:09 -0800
Subject: [PATCH 07/36] drm/vc4: Improve comments on vc4_plane_state members.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_plane.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 0addbad..45e353d 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -26,16 +26,19 @@
 
 struct vc4_plane_state {
 	struct drm_plane_state base;
+	/* System memory copy of the display list for this element, computed
+	 * at atomic_check time.
+	 */
 	u32 *dlist;
-	u32 dlist_size; /* Number of dwords in allocated for the display list */
+	u32 dlist_size; /* Number of dwords allocated for the display list */
 	u32 dlist_count; /* Number of used dwords in the display list. */
 
 	/* Offset in the dlist to pointer word 0. */
 	u32 pw0_offset;
 
 	/* Offset where the plane's dlist was last stored in the
-	   hardware at vc4_crtc_atomic_flush() time.
-	*/
+	 * hardware at vc4_crtc_atomic_flush() time.
+	 */
 	u32 *hw_dlist;
 };
 
-- 
2.7.3

From 4c8b2ce80659e1c7a75b7b54430dab320aeb440b Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 28 Dec 2015 14:14:57 -0800
Subject: [PATCH 08/36] drm/vc4: Add missing __iomem annotation to hw_dlist.

This is the pointer to the HVS device's memory where we stored the
contents of *dlist.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_plane.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 45e353d..ed07ee5 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -39,7 +39,7 @@ struct vc4_plane_state {
 	/* Offset where the plane's dlist was last stored in the
 	 * hardware at vc4_crtc_atomic_flush() time.
 	 */
-	u32 *hw_dlist;
+	u32 __iomem *hw_dlist;
 };
 
 static inline struct vc4_plane_state *
-- 
2.7.3

From f792f380190638916b495f3051547a849fc97fd2 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 28 Dec 2015 14:34:44 -0800
Subject: [PATCH 09/36] drm/vc4: Move the plane clipping/scaling setup to a
 separate function.

As we add actual scaling, this is going to get way more complicated.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_plane.c | 78 +++++++++++++++++++++++++++--------------
 1 file changed, 52 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index ed07ee5..554ed54 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -40,6 +40,14 @@ struct vc4_plane_state {
 	 * hardware at vc4_crtc_atomic_flush() time.
 	 */
 	u32 __iomem *hw_dlist;
+
+	/* Clipped coordinates of the plane on the display. */
+	int crtc_x, crtc_y, crtc_w, crtc_h;
+
+	/* Offset to start scanning out from the start of the plane's
+	 * BO.
+	 */
+	u32 offset;
 };
 
 static inline struct vc4_plane_state *
@@ -151,22 +159,17 @@ static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
 	vc4_state->dlist[vc4_state->dlist_count++] = val;
 }
 
-/* Writes out a full display list for an active plane to the plane's
- * private dlist state.
- */
-static int vc4_plane_mode_set(struct drm_plane *plane,
-			      struct drm_plane_state *state)
+static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
 {
 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
 	struct drm_framebuffer *fb = state->fb;
-	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
-	u32 ctl0_offset = vc4_state->dlist_count;
-	const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format);
-	uint32_t offset = fb->offsets[0];
-	int crtc_x = state->crtc_x;
-	int crtc_y = state->crtc_y;
-	int crtc_w = state->crtc_w;
-	int crtc_h = state->crtc_h;
+
+	vc4_state->offset = fb->offsets[0];
+
+	vc4_state->crtc_x = state->crtc_x;
+	vc4_state->crtc_y = state->crtc_y;
+	vc4_state->crtc_w = state->crtc_w;
+	vc4_state->crtc_h = state->crtc_h;
 
 	if (state->crtc_w << 16 != state->src_w ||
 	    state->crtc_h << 16 != state->src_h) {
@@ -178,18 +181,41 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 		return -EINVAL;
 	}
 
-	if (crtc_x < 0) {
-		offset += drm_format_plane_cpp(fb->pixel_format, 0) * -crtc_x;
-		crtc_w += crtc_x;
-		crtc_x = 0;
+	if (vc4_state->crtc_x < 0) {
+		vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format,
+							   0) *
+				      -vc4_state->crtc_x);
+		vc4_state->crtc_w += vc4_state->crtc_x;
+		vc4_state->crtc_x = 0;
 	}
 
-	if (crtc_y < 0) {
-		offset += fb->pitches[0] * -crtc_y;
-		crtc_h += crtc_y;
-		crtc_y = 0;
+	if (vc4_state->crtc_y < 0) {
+		vc4_state->offset += fb->pitches[0] * -vc4_state->crtc_y;
+		vc4_state->crtc_h += vc4_state->crtc_y;
+		vc4_state->crtc_y = 0;
 	}
 
+	return 0;
+}
+
+
+/* Writes out a full display list for an active plane to the plane's
+ * private dlist state.
+ */
+static int vc4_plane_mode_set(struct drm_plane *plane,
+			      struct drm_plane_state *state)
+{
+	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+	struct drm_framebuffer *fb = state->fb;
+	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
+	u32 ctl0_offset = vc4_state->dlist_count;
+	const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format);
+	int ret;
+
+	ret = vc4_plane_setup_clipping_and_scaling(state);
+	if (ret)
+		return ret;
+
 	vc4_dlist_write(vc4_state,
 			SCALER_CTL0_VALID |
 			(format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
@@ -199,8 +225,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 	/* Position Word 0: Image Positions and Alpha Value */
 	vc4_dlist_write(vc4_state,
 			VC4_SET_FIELD(0xff, SCALER_POS0_FIXED_ALPHA) |
-			VC4_SET_FIELD(crtc_x, SCALER_POS0_START_X) |
-			VC4_SET_FIELD(crtc_y, SCALER_POS0_START_Y));
+			VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
+			VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
 
 	/* Position Word 1: Scaled Image Dimensions.
 	 * Skipped due to SCALER_CTL0_UNITY scaling.
@@ -212,8 +238,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 				      SCALER_POS2_ALPHA_MODE_PIPELINE :
 				      SCALER_POS2_ALPHA_MODE_FIXED,
 				      SCALER_POS2_ALPHA_MODE) |
-			VC4_SET_FIELD(crtc_w, SCALER_POS2_WIDTH) |
-			VC4_SET_FIELD(crtc_h, SCALER_POS2_HEIGHT));
+			VC4_SET_FIELD(vc4_state->crtc_w, SCALER_POS2_WIDTH) |
+			VC4_SET_FIELD(vc4_state->crtc_h, SCALER_POS2_HEIGHT));
 
 	/* Position Word 3: Context.  Written by the HVS. */
 	vc4_dlist_write(vc4_state, 0xc0c0c0c0);
@@ -221,7 +247,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 	vc4_state->pw0_offset = vc4_state->dlist_count;
 
 	/* Pointer Word 0: RGB / Y Pointer */
-	vc4_dlist_write(vc4_state, bo->paddr + offset);
+	vc4_dlist_write(vc4_state, bo->paddr + vc4_state->offset);
 
 	/* Pointer Context Word 0: Written by the HVS */
 	vc4_dlist_write(vc4_state, 0xc0c0c0c0);
-- 
2.7.3

From 696f1db279f08e58bc94172818209d5914e0e2d8 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 30 Dec 2015 11:50:22 -0800
Subject: [PATCH 10/36] drm/vc4: Add a proper short-circut path for legacy
 cursor updates.

Previously, on every modeset we would allocate new display list
memory, recompute changed planes, write all of them to the new memory,
and pointed scanout at the new list (which will latch approximately at
the next line of scanout).  We let
drm_atomic_helper_wait_for_vblanks() decide whether we needed to wait
for a vblank after a modeset before cleaning up the old state and
letting the next modeset proceed, and on legacy cursor updates we
wouldn't wait.  If you moved the cursor fast enough, we could
potentially wrap around the display list memory area and overwrite the
existing display list while it was still being scanned out, resulting
in the HVS scanning out garbage or just halting.

Instead of making cursor updates wait for scanout to move to the new
display list area (which introduces significant cursor lag in X), we
just rewrite our current display list.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_kms.c   |  9 ++++
 drivers/gpu/drm/vc4/vc4_plane.c | 94 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 96 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index f95f2df..4718ae5 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -49,6 +49,15 @@ vc4_atomic_complete_commit(struct vc4_commit *c)
 
 	drm_atomic_helper_commit_modeset_enables(dev, state);
 
+	/* Make sure that drm_atomic_helper_wait_for_vblanks()
+	 * actually waits for vblank.  If we're doing a full atomic
+	 * modeset (as opposed to a vc4_update_plane() short circuit),
+	 * then we need to wait for scanout to be done with our
+	 * display lists before we free it and potentially reallocate
+	 * and overwrite the dlist memory with a new modeset.
+	 */
+	state->legacy_cursor_update = false;
+
 	drm_atomic_helper_wait_for_vblanks(dev, state);
 
 	drm_atomic_helper_cleanup_planes(dev, state);
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 554ed54..713ec00 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -33,8 +33,12 @@ struct vc4_plane_state {
 	u32 dlist_size; /* Number of dwords allocated for the display list */
 	u32 dlist_count; /* Number of used dwords in the display list. */
 
-	/* Offset in the dlist to pointer word 0. */
-	u32 pw0_offset;
+	/* Offset in the dlist to various words, for pageflip or
+	 * cursor updates.
+	 */
+	u32 pos0_offset;
+	u32 pos2_offset;
+	u32 ptr0_offset;
 
 	/* Offset where the plane's dlist was last stored in the
 	 * hardware at vc4_crtc_atomic_flush() time.
@@ -223,6 +227,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 			SCALER_CTL0_UNITY);
 
 	/* Position Word 0: Image Positions and Alpha Value */
+	vc4_state->pos0_offset = vc4_state->dlist_count;
 	vc4_dlist_write(vc4_state,
 			VC4_SET_FIELD(0xff, SCALER_POS0_FIXED_ALPHA) |
 			VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
@@ -233,6 +238,7 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 	 */
 
 	/* Position Word 2: Source Image Size, Alpha Mode */
+	vc4_state->pos2_offset = vc4_state->dlist_count;
 	vc4_dlist_write(vc4_state,
 			VC4_SET_FIELD(format->has_alpha ?
 				      SCALER_POS2_ALPHA_MODE_PIPELINE :
@@ -244,9 +250,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 	/* Position Word 3: Context.  Written by the HVS. */
 	vc4_dlist_write(vc4_state, 0xc0c0c0c0);
 
-	vc4_state->pw0_offset = vc4_state->dlist_count;
-
 	/* Pointer Word 0: RGB / Y Pointer */
+	vc4_state->ptr0_offset = vc4_state->dlist_count;
 	vc4_dlist_write(vc4_state, bo->paddr + vc4_state->offset);
 
 	/* Pointer Context Word 0: Written by the HVS */
@@ -332,13 +337,13 @@ void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
 	 * scanout will start from this address as soon as the FIFO
 	 * needs to refill with pixels.
 	 */
-	writel(addr, &vc4_state->hw_dlist[vc4_state->pw0_offset]);
+	writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
 
 	/* Also update the CPU-side dlist copy, so that any later
 	 * atomic updates that don't do a new modeset on our plane
 	 * also use our updated address.
 	 */
-	vc4_state->dlist[vc4_state->pw0_offset] = addr;
+	vc4_state->dlist[vc4_state->ptr0_offset] = addr;
 }
 
 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
@@ -354,8 +359,83 @@ static void vc4_plane_destroy(struct drm_plane *plane)
 	drm_plane_cleanup(plane);
 }
 
+/* Implements immediate (non-vblank-synced) updates of the cursor
+ * position, or falls back to the atomic helper otherwise.
+ */
+static int
+vc4_update_plane(struct drm_plane *plane,
+		 struct drm_crtc *crtc,
+		 struct drm_framebuffer *fb,
+		 int crtc_x, int crtc_y,
+		 unsigned int crtc_w, unsigned int crtc_h,
+		 uint32_t src_x, uint32_t src_y,
+		 uint32_t src_w, uint32_t src_h)
+{
+	struct drm_plane_state *plane_state;
+	struct vc4_plane_state *vc4_state;
+
+	if (plane != crtc->cursor)
+		goto out;
+
+	plane_state = plane->state;
+	vc4_state = to_vc4_plane_state(plane_state);
+
+	if (!plane_state)
+		goto out;
+
+	/* If we're changing the cursor contents, do that in the
+	 * normal vblank-synced atomic path.
+	 */
+	if (fb != plane_state->fb)
+		goto out;
+
+	/* No configuring new scaling in the fast path. */
+	if (crtc_w != plane_state->crtc_w ||
+	    crtc_h != plane_state->crtc_h ||
+	    src_w != plane_state->src_w ||
+	    src_h != plane_state->src_h) {
+		goto out;
+	}
+
+	/* Set the cursor's position on the screen.  This is the
+	 * expected change from the drm_mode_cursor_universal()
+	 * helper.
+	 */
+	plane_state->crtc_x = crtc_x;
+	plane_state->crtc_y = crtc_y;
+
+	/* Allow changing the start position within the cursor BO, if
+	 * that matters.
+	 */
+	plane_state->src_x = src_x;
+	plane_state->src_y = src_y;
+
+	/* Update the display list based on the new crtc_x/y. */
+	vc4_plane_atomic_check(plane, plane_state);
+
+	/* Note that we can't just call vc4_plane_write_dlist()
+	 * because that would smash the context data that the HVS is
+	 * currently using.
+	 */
+	writel(vc4_state->dlist[vc4_state->pos0_offset],
+	       &vc4_state->hw_dlist[vc4_state->pos0_offset]);
+	writel(vc4_state->dlist[vc4_state->pos2_offset],
+	       &vc4_state->hw_dlist[vc4_state->pos2_offset]);
+	writel(vc4_state->dlist[vc4_state->ptr0_offset],
+	       &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
+
+	return 0;
+
+out:
+	return drm_atomic_helper_update_plane(plane, crtc, fb,
+					      crtc_x, crtc_y,
+					      crtc_w, crtc_h,
+					      src_x, src_y,
+					      src_w, src_h);
+}
+
 static const struct drm_plane_funcs vc4_plane_funcs = {
-	.update_plane = drm_atomic_helper_update_plane,
+	.update_plane = vc4_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
 	.destroy = vc4_plane_destroy,
 	.set_property = NULL,
-- 
2.7.3

From cd30019db690e3a92fe5d7d771352f118a105f82 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 28 Dec 2015 13:25:41 -0800
Subject: [PATCH 11/36] drm/vc4: Make the CRTCs cooperate on allocating display
 lists.

So far, we've only ever lit up one CRTC, so this has been fine.  To
extend to more displays or more planes, we need to make sure we don't
run our display lists into each other.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_crtc.c | 115 +++++++++++++++++++++++------------------
 drivers/gpu/drm/vc4/vc4_drv.h  |   8 ++-
 drivers/gpu/drm/vc4/vc4_hvs.c  |  13 +++++
 3 files changed, 84 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 6ae5abc..9032c06 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -49,22 +49,27 @@ struct vc4_crtc {
 	/* Which HVS channel we're using for our CRTC. */
 	int channel;
 
-	/* Pointer to the actual hardware display list memory for the
-	 * crtc.
-	 */
-	u32 __iomem *dlist;
-
-	u32 dlist_size; /* in dwords */
-
 	struct drm_pending_vblank_event *event;
 };
 
+struct vc4_crtc_state {
+	struct drm_crtc_state base;
+	/* Dlist area for this CRTC configuration. */
+	struct drm_mm_node mm;
+};
+
 static inline struct vc4_crtc *
 to_vc4_crtc(struct drm_crtc *crtc)
 {
 	return (struct vc4_crtc *)crtc;
 }
 
+static inline struct vc4_crtc_state *
+to_vc4_crtc_state(struct drm_crtc_state *crtc_state)
+{
+	return (struct vc4_crtc_state *)crtc_state;
+}
+
 struct vc4_crtc_data {
 	/* Which channel of the HVS this pixelvalve sources from. */
 	int hvs_channel;
@@ -335,11 +340,13 @@ static void vc4_crtc_enable(struct drm_crtc *crtc)
 static int vc4_crtc_atomic_check(struct drm_crtc *crtc,
 				 struct drm_crtc_state *state)
 {
+	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state);
 	struct drm_device *dev = crtc->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct drm_plane *plane;
-	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+	unsigned long flags;
 	u32 dlist_count = 0;
+	int ret;
 
 	/* The pixelvalve can only feed one encoder (and encoders are
 	 * 1:1 with connectors.)
@@ -362,18 +369,12 @@ static int vc4_crtc_atomic_check(struct drm_crtc *crtc,
 
 	dlist_count++; /* Account for SCALER_CTL0_END. */
 
-	if (!vc4_crtc->dlist || dlist_count > vc4_crtc->dlist_size) {
-		vc4_crtc->dlist = ((u32 __iomem *)vc4->hvs->dlist +
-				   HVS_BOOTLOADER_DLIST_END);
-		vc4_crtc->dlist_size = ((SCALER_DLIST_SIZE >> 2) -
-					HVS_BOOTLOADER_DLIST_END);
-
-		if (dlist_count > vc4_crtc->dlist_size) {
-			DRM_DEBUG_KMS("dlist too large for CRTC (%d > %d).\n",
-				      dlist_count, vc4_crtc->dlist_size);
-			return -EINVAL;
-		}
-	}
+	spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
+	ret = drm_mm_insert_node(&vc4->hvs->dlist_mm, &vc4_state->mm,
+				 dlist_count, 1, 0);
+	spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags);
+	if (ret)
+		return ret;
 
 	return 0;
 }
@@ -384,47 +385,29 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
 	struct drm_device *dev = crtc->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
+	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
 	struct drm_plane *plane;
 	bool debug_dump_regs = false;
-	u32 __iomem *dlist_next = vc4_crtc->dlist;
+	u32 __iomem *dlist_start = vc4->hvs->dlist + vc4_state->mm.start;
+	u32 __iomem *dlist_next = dlist_start;
 
 	if (debug_dump_regs) {
 		DRM_INFO("CRTC %d HVS before:\n", drm_crtc_index(crtc));
 		vc4_hvs_dump_state(dev);
 	}
 
-	/* Copy all the active planes' dlist contents to the hardware dlist.
-	 *
-	 * XXX: If the new display list was large enough that it
-	 * overlapped a currently-read display list, we need to do
-	 * something like disable scanout before putting in the new
-	 * list.  For now, we're safe because we only have the two
-	 * planes.
-	 */
+	/* Copy all the active planes' dlist contents to the hardware dlist. */
 	drm_atomic_crtc_for_each_plane(plane, crtc) {
 		dlist_next += vc4_plane_write_dlist(plane, dlist_next);
 	}
 
-	if (dlist_next == vc4_crtc->dlist) {
-		/* If no planes were enabled, use the SCALER_CTL0_END
-		 * at the start of the display list memory (in the
-		 * bootloader section).  We'll rewrite that
-		 * SCALER_CTL0_END, just in case, though.
-		 */
-		writel(SCALER_CTL0_END, vc4->hvs->dlist);
-		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel), 0);
-	} else {
-		writel(SCALER_CTL0_END, dlist_next);
-		dlist_next++;
-
-		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
-			  (u32 __iomem *)vc4_crtc->dlist -
-			  (u32 __iomem *)vc4->hvs->dlist);
-
-		/* Make the next display list start after ours. */
-		vc4_crtc->dlist_size -= (dlist_next - vc4_crtc->dlist);
-		vc4_crtc->dlist = dlist_next;
-	}
+	writel(SCALER_CTL0_END, dlist_next);
+	dlist_next++;
+
+	WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size);
+
+	HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
+		  vc4_state->mm.start);
 
 	if (debug_dump_regs) {
 		DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc));
@@ -590,6 +573,36 @@ static int vc4_page_flip(struct drm_crtc *crtc,
 		return drm_atomic_helper_page_flip(crtc, fb, event, flags);
 }
 
+static struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc)
+{
+	struct vc4_crtc_state *vc4_state;
+
+	vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
+	if (!vc4_state)
+		return NULL;
+
+	__drm_atomic_helper_crtc_duplicate_state(crtc, &vc4_state->base);
+	return &vc4_state->base;
+}
+
+static void vc4_crtc_destroy_state(struct drm_crtc *crtc,
+				   struct drm_crtc_state *state)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(crtc->dev);
+	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state);
+
+	if (vc4_state->mm.allocated) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
+		drm_mm_remove_node(&vc4_state->mm);
+		spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags);
+
+	}
+
+	__drm_atomic_helper_crtc_destroy_state(crtc, state);
+}
+
 static const struct drm_crtc_funcs vc4_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.destroy = vc4_crtc_destroy,
@@ -598,8 +611,8 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = {
 	.cursor_set = NULL, /* handled by drm_mode_cursor_universal */
 	.cursor_move = NULL, /* handled by drm_mode_cursor_universal */
 	.reset = drm_atomic_helper_crtc_reset,
-	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+	.atomic_duplicate_state = vc4_crtc_duplicate_state,
+	.atomic_destroy_state = vc4_crtc_destroy_state,
 };
 
 static const struct drm_crtc_helper_funcs vc4_crtc_helper_funcs = {
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 51a6333..38a31c7 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -154,7 +154,13 @@ struct vc4_v3d {
 struct vc4_hvs {
 	struct platform_device *pdev;
 	void __iomem *regs;
-	void __iomem *dlist;
+	u32 __iomem *dlist;
+
+	/* Memory manager for CRTCs to allocate space in the display
+	 * list.  Units are dwords.
+	 */
+	struct drm_mm dlist_mm;
+	spinlock_t mm_lock;
 };
 
 struct vc4_plane {
diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c
index 8098c5b..9e43554 100644
--- a/drivers/gpu/drm/vc4/vc4_hvs.c
+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
@@ -119,6 +119,17 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
 
 	hvs->dlist = hvs->regs + SCALER_DLIST_START;
 
+	spin_lock_init(&hvs->mm_lock);
+
+	/* Set up the HVS display list memory manager.  We never
+	 * overwrite the setup from the bootloader (just 128b out of
+	 * our 16K), since we don't want to scramble the screen when
+	 * transitioning from the firmware's boot setup to runtime.
+	 */
+	drm_mm_init(&hvs->dlist_mm,
+		    HVS_BOOTLOADER_DLIST_END,
+		    (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END);
+
 	vc4->hvs = hvs;
 	return 0;
 }
@@ -129,6 +140,8 @@ static void vc4_hvs_unbind(struct device *dev, struct device *master,
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct vc4_dev *vc4 = drm->dev_private;
 
+	drm_mm_takedown(&vc4->hvs->dlist_mm);
+
 	vc4->hvs = NULL;
 }
 
-- 
2.7.3

From 7934fe9bdbbe2ffb4bcfe656a22a8f9f4e3d266a Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 28 Dec 2015 14:45:25 -0800
Subject: [PATCH 12/36] drm/vc4: Fix which value is being used for source image
 size.

This doesn't matter yet since we only allow 1:1 scaling, but the
comment clearly says we should be using the source size.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_plane.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 713ec00..d9c9290 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -47,6 +47,8 @@ struct vc4_plane_state {
 
 	/* Clipped coordinates of the plane on the display. */
 	int crtc_x, crtc_y, crtc_w, crtc_h;
+	/* Clipped size of the area scanned from in the FB. */
+	u32 src_w, src_h;
 
 	/* Offset to start scanning out from the start of the plane's
 	 * BO.
@@ -170,11 +172,6 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
 
 	vc4_state->offset = fb->offsets[0];
 
-	vc4_state->crtc_x = state->crtc_x;
-	vc4_state->crtc_y = state->crtc_y;
-	vc4_state->crtc_w = state->crtc_w;
-	vc4_state->crtc_h = state->crtc_h;
-
 	if (state->crtc_w << 16 != state->src_w ||
 	    state->crtc_h << 16 != state->src_h) {
 		/* We don't support scaling yet, which involves
@@ -185,17 +182,25 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
 		return -EINVAL;
 	}
 
+	vc4_state->src_w = state->src_w >> 16;
+	vc4_state->src_h = state->src_h >> 16;
+
+	vc4_state->crtc_x = state->crtc_x;
+	vc4_state->crtc_y = state->crtc_y;
+	vc4_state->crtc_w = state->crtc_w;
+	vc4_state->crtc_h = state->crtc_h;
+
 	if (vc4_state->crtc_x < 0) {
 		vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format,
 							   0) *
 				      -vc4_state->crtc_x);
-		vc4_state->crtc_w += vc4_state->crtc_x;
+		vc4_state->src_w += vc4_state->crtc_x;
 		vc4_state->crtc_x = 0;
 	}
 
 	if (vc4_state->crtc_y < 0) {
 		vc4_state->offset += fb->pitches[0] * -vc4_state->crtc_y;
-		vc4_state->crtc_h += vc4_state->crtc_y;
+		vc4_state->src_h += vc4_state->crtc_y;
 		vc4_state->crtc_y = 0;
 	}
 
@@ -244,8 +249,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 				      SCALER_POS2_ALPHA_MODE_PIPELINE :
 				      SCALER_POS2_ALPHA_MODE_FIXED,
 				      SCALER_POS2_ALPHA_MODE) |
-			VC4_SET_FIELD(vc4_state->crtc_w, SCALER_POS2_WIDTH) |
-			VC4_SET_FIELD(vc4_state->crtc_h, SCALER_POS2_HEIGHT));
+			VC4_SET_FIELD(vc4_state->src_w, SCALER_POS2_WIDTH) |
+			VC4_SET_FIELD(vc4_state->src_h, SCALER_POS2_HEIGHT));
 
 	/* Position Word 3: Context.  Written by the HVS. */
 	vc4_dlist_write(vc4_state, 0xc0c0c0c0);
-- 
2.7.3

From e710e8e1d13c85a635c09168df6d008955ac5a4e Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 20 Oct 2015 16:06:57 +0100
Subject: [PATCH 13/36] drm/vc4: Add support for scaling of display planes.

This implements a simple policy for choosing scaling modes
(trapezoidal for decimation, PPF for magnification), and a single PPF
filter (Mitchell/Netravali's recommendation).

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_drv.h   |   4 +
 drivers/gpu/drm/vc4/vc4_hvs.c   |  84 +++++++++++++
 drivers/gpu/drm/vc4/vc4_plane.c | 253 +++++++++++++++++++++++++++++++++++++---
 drivers/gpu/drm/vc4/vc4_regs.h  |  46 ++++++++
 4 files changed, 374 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 38a31c7..83db0b7 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -160,7 +160,11 @@ struct vc4_hvs {
 	 * list.  Units are dwords.
 	 */
 	struct drm_mm dlist_mm;
+	/* Memory manager for the LBM memory used by HVS scaling. */
+	struct drm_mm lbm_mm;
 	spinlock_t mm_lock;
+
+	struct drm_mm_node mitchell_netravali_filter;
 };
 
 struct vc4_plane {
diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c
index 9e43554..6fbab1c 100644
--- a/drivers/gpu/drm/vc4/vc4_hvs.c
+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
@@ -100,12 +100,76 @@ int vc4_hvs_debugfs_regs(struct seq_file *m, void *unused)
 }
 #endif
 
+/* The filter kernel is composed of dwords each containing 3 9-bit
+ * signed integers packed next to each other.
+ */
+#define VC4_INT_TO_COEFF(coeff) (coeff & 0x1ff)
+#define VC4_PPF_FILTER_WORD(c0, c1, c2)				\
+	((((c0) & 0x1ff) << 0) |				\
+	 (((c1) & 0x1ff) << 9) |				\
+	 (((c2) & 0x1ff) << 18))
+
+/* The whole filter kernel is arranged as the coefficients 0-16 going
+ * up, then a pad, then 17-31 going down and reversed within the
+ * dwords.  This means that a linear phase kernel (where it's
+ * symmetrical at the boundary between 15 and 16) has the last 5
+ * dwords matching the first 5, but reversed.
+ */
+#define VC4_LINEAR_PHASE_KERNEL(c0, c1, c2, c3, c4, c5, c6, c7, c8,	\
+				c9, c10, c11, c12, c13, c14, c15)	\
+	{VC4_PPF_FILTER_WORD(c0, c1, c2),				\
+	 VC4_PPF_FILTER_WORD(c3, c4, c5),				\
+	 VC4_PPF_FILTER_WORD(c6, c7, c8),				\
+	 VC4_PPF_FILTER_WORD(c9, c10, c11),				\
+	 VC4_PPF_FILTER_WORD(c12, c13, c14),				\
+	 VC4_PPF_FILTER_WORD(c15, c15, 0)}
+
+#define VC4_LINEAR_PHASE_KERNEL_DWORDS 6
+#define VC4_KERNEL_DWORDS (VC4_LINEAR_PHASE_KERNEL_DWORDS * 2 - 1)
+
+/* Recommended B=1/3, C=1/3 filter choice from Mitchell/Netravali.
+ * http://www.cs.utexas.edu/~fussell/courses/cs384g/lectures/mitchell/Mitchell.pdf
+ */
+static const u32 mitchell_netravali_1_3_1_3_kernel[] =
+	VC4_LINEAR_PHASE_KERNEL(0, -2, -6, -8, -10, -8, -3, 2, 18,
+				50, 82, 119, 155, 187, 213, 227);
+
+static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs,
+					struct drm_mm_node *space,
+					const u32 *kernel)
+{
+	int ret, i;
+	u32 __iomem *dst_kernel;
+
+	ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS, 1,
+				 0);
+	if (ret) {
+		DRM_ERROR("Failed to allocate space for filter kernel: %d\n",
+			  ret);
+		return ret;
+	}
+
+	dst_kernel = hvs->dlist + space->start;
+
+	for (i = 0; i < VC4_KERNEL_DWORDS; i++) {
+		if (i < VC4_LINEAR_PHASE_KERNEL_DWORDS)
+			writel(kernel[i], &dst_kernel[i]);
+		else {
+			writel(kernel[VC4_KERNEL_DWORDS - i - 1],
+			       &dst_kernel[i]);
+		}
+	}
+
+	return 0;
+}
+
 static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct vc4_dev *vc4 = drm->dev_private;
 	struct vc4_hvs *hvs = NULL;
+	int ret;
 
 	hvs = devm_kzalloc(&pdev->dev, sizeof(*hvs), GFP_KERNEL);
 	if (!hvs)
@@ -130,6 +194,22 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
 		    HVS_BOOTLOADER_DLIST_END,
 		    (SCALER_DLIST_SIZE >> 2) - HVS_BOOTLOADER_DLIST_END);
 
+	/* Set up the HVS LBM memory manager.  We could have some more
+	 * complicated data structure that allowed reuse of LBM areas
+	 * between planes when they don't overlap on the screen, but
+	 * for now we just allocate globally.
+	 */
+	drm_mm_init(&hvs->lbm_mm, 0, 96 * 1024);
+
+	/* Upload filter kernels.  We only have the one for now, so we
+	 * keep it around for the lifetime of the driver.
+	 */
+	ret = vc4_hvs_upload_linear_kernel(hvs,
+					   &hvs->mitchell_netravali_filter,
+					   mitchell_netravali_1_3_1_3_kernel);
+	if (ret)
+		return ret;
+
 	vc4->hvs = hvs;
 	return 0;
 }
@@ -140,7 +220,11 @@ static void vc4_hvs_unbind(struct device *dev, struct device *master,
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct vc4_dev *vc4 = drm->dev_private;
 
+	if (vc4->hvs->mitchell_netravali_filter.allocated)
+		drm_mm_remove_node(&vc4->hvs->mitchell_netravali_filter);
+
 	drm_mm_takedown(&vc4->hvs->dlist_mm);
+	drm_mm_takedown(&vc4->hvs->lbm_mm);
 
 	vc4->hvs = NULL;
 }
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index d9c9290..7c2d697 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -24,6 +24,12 @@
 #include "drm_fb_cma_helper.h"
 #include "drm_plane_helper.h"
 
+enum vc4_scaling_mode {
+	VC4_SCALING_NONE,
+	VC4_SCALING_TPZ,
+	VC4_SCALING_PPF,
+};
+
 struct vc4_plane_state {
 	struct drm_plane_state base;
 	/* System memory copy of the display list for this element, computed
@@ -47,13 +53,19 @@ struct vc4_plane_state {
 
 	/* Clipped coordinates of the plane on the display. */
 	int crtc_x, crtc_y, crtc_w, crtc_h;
-	/* Clipped size of the area scanned from in the FB. */
-	u32 src_w, src_h;
+	/* Clipped area being scanned from in the FB. */
+	u32 src_x, src_y, src_w, src_h;
+
+	enum vc4_scaling_mode x_scaling, y_scaling;
+	bool is_unity;
 
 	/* Offset to start scanning out from the start of the plane's
 	 * BO.
 	 */
 	u32 offset;
+
+	/* Our allocation in LBM for temporary storage during scaling. */
+	struct drm_mm_node lbm;
 };
 
 static inline struct vc4_plane_state *
@@ -90,6 +102,16 @@ static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
 	return NULL;
 }
 
+static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
+{
+	if (dst > src)
+		return VC4_SCALING_PPF;
+	else if (dst < src)
+		return VC4_SCALING_TPZ;
+	else
+		return VC4_SCALING_NONE;
+}
+
 static bool plane_enabled(struct drm_plane_state *state)
 {
 	return state->fb && state->crtc;
@@ -106,6 +128,8 @@ static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane
 	if (!vc4_state)
 		return NULL;
 
+	memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
+
 	__drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
 
 	if (vc4_state->dlist) {
@@ -125,8 +149,17 @@ static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane
 static void vc4_plane_destroy_state(struct drm_plane *plane,
 				    struct drm_plane_state *state)
 {
+	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
 
+	if (vc4_state->lbm.allocated) {
+		unsigned long irqflags;
+
+		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
+		drm_mm_remove_node(&vc4_state->lbm);
+		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
+	}
+
 	kfree(vc4_state->dlist);
 	__drm_atomic_helper_plane_destroy_state(plane, &vc4_state->base);
 	kfree(state);
@@ -165,23 +198,60 @@ static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
 	vc4_state->dlist[vc4_state->dlist_count++] = val;
 }
 
+/* Returns the scl0/scl1 field based on whether the dimensions need to
+ * be up/down/non-scaled.
+ *
+ * This is a replication of a table from the spec.
+ */
+static u32 vc4_get_scl_field(struct drm_plane_state *state)
+{
+	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+
+	switch (vc4_state->x_scaling << 2 | vc4_state->y_scaling) {
+	case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
+		return SCALER_CTL0_SCL_H_PPF_V_PPF;
+	case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
+		return SCALER_CTL0_SCL_H_TPZ_V_PPF;
+	case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
+		return SCALER_CTL0_SCL_H_PPF_V_TPZ;
+	case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
+		return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
+	case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
+		return SCALER_CTL0_SCL_H_PPF_V_NONE;
+	case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
+		return SCALER_CTL0_SCL_H_NONE_V_PPF;
+	case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
+		return SCALER_CTL0_SCL_H_NONE_V_TPZ;
+	case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
+		return SCALER_CTL0_SCL_H_TPZ_V_NONE;
+	default:
+	case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
+		/* The unity case is independently handled by
+		 * SCALER_CTL0_UNITY.
+		 */
+		return 0;
+	}
+}
+
 static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
 {
+	struct drm_plane *plane = state->plane;
 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
 	struct drm_framebuffer *fb = state->fb;
+	u32 subpixel_src_mask = (1 << 16) - 1;
 
 	vc4_state->offset = fb->offsets[0];
 
-	if (state->crtc_w << 16 != state->src_w ||
-	    state->crtc_h << 16 != state->src_h) {
-		/* We don't support scaling yet, which involves
-		 * allocating the LBM memory for scaling temporary
-		 * storage, and putting filter kernels in the HVS
-		 * context.
-		 */
+	/* We don't support subpixel source positioning for scaling. */
+	if ((state->src_x & subpixel_src_mask) ||
+	    (state->src_y & subpixel_src_mask) ||
+	    (state->src_w & subpixel_src_mask) ||
+	    (state->src_h & subpixel_src_mask)) {
 		return -EINVAL;
 	}
 
+	vc4_state->src_x = state->src_x >> 16;
+	vc4_state->src_y = state->src_y >> 16;
 	vc4_state->src_w = state->src_w >> 16;
 	vc4_state->src_h = state->src_h >> 16;
 
@@ -190,6 +260,23 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
 	vc4_state->crtc_w = state->crtc_w;
 	vc4_state->crtc_h = state->crtc_h;
 
+	vc4_state->x_scaling = vc4_get_scaling_mode(vc4_state->src_w,
+						    vc4_state->crtc_w);
+	vc4_state->y_scaling = vc4_get_scaling_mode(vc4_state->src_h,
+						    vc4_state->crtc_h);
+	vc4_state->is_unity = (vc4_state->x_scaling == VC4_SCALING_NONE &&
+			       vc4_state->y_scaling == VC4_SCALING_NONE);
+
+	/* No configuring scaling on the cursor plane, since it gets
+	   non-vblank-synced updates, and scaling requires requires
+	   LBM changes which have to be vblank-synced.
+	 */
+	if (plane->type == DRM_PLANE_TYPE_CURSOR && !vc4_state->is_unity)
+		return -EINVAL;
+
+	/* Clamp the on-screen start x/y to 0.  The hardware doesn't
+	 * support negative y, and negative x wastes bandwidth.
+	 */
 	if (vc4_state->crtc_x < 0) {
 		vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format,
 							   0) *
@@ -207,6 +294,87 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
 	return 0;
 }
 
+static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
+{
+	u32 scale, recip;
+
+	scale = (1 << 16) * src / dst;
+
+	/* The specs note that while the reciprocal would be defined
+	 * as (1<<32)/scale, ~0 is close enough.
+	 */
+	recip = ~0 / scale;
+
+	vc4_dlist_write(vc4_state,
+			VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
+			VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
+	vc4_dlist_write(vc4_state,
+			VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
+}
+
+static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
+{
+	u32 scale = (1 << 16) * src / dst;
+
+	vc4_dlist_write(vc4_state,
+			SCALER_PPF_AGC |
+			VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
+			VC4_SET_FIELD(0, SCALER_PPF_IPHASE));
+}
+
+static u32 vc4_lbm_size(struct drm_plane_state *state)
+{
+	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+	/* This is the worst case number.  One of the two sizes will
+	 * be used depending on the scaling configuration.
+	 */
+	u32 pix_per_line = max(vc4_state->src_w, (u32)vc4_state->crtc_w);
+	u32 lbm;
+
+	if (vc4_state->is_unity)
+		return 0;
+	else if (vc4_state->y_scaling == VC4_SCALING_TPZ)
+		lbm = pix_per_line * 8;
+	else {
+		/* In special cases, this multiplier might be 12. */
+		lbm = pix_per_line * 16;
+	}
+
+	lbm = roundup(lbm, 32);
+
+	return lbm;
+}
+
+static void vc4_write_scaling_parameters(struct drm_plane_state *state)
+{
+	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
+
+	/* Ch0 H-PPF Word 0: Scaling Parameters */
+	if (vc4_state->x_scaling == VC4_SCALING_PPF) {
+		vc4_write_ppf(vc4_state,
+			      vc4_state->src_w, vc4_state->crtc_w);
+	}
+
+	/* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
+	if (vc4_state->y_scaling == VC4_SCALING_PPF) {
+		vc4_write_ppf(vc4_state,
+			      vc4_state->src_h, vc4_state->crtc_h);
+		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
+	}
+
+	/* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
+	if (vc4_state->x_scaling == VC4_SCALING_TPZ) {
+		vc4_write_tpz(vc4_state,
+			      vc4_state->src_w, vc4_state->crtc_w);
+	}
+
+	/* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
+	if (vc4_state->y_scaling == VC4_SCALING_TPZ) {
+		vc4_write_tpz(vc4_state,
+			      vc4_state->src_h, vc4_state->crtc_h);
+		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
+	}
+}
 
 /* Writes out a full display list for an active plane to the plane's
  * private dlist state.
@@ -214,22 +382,50 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
 static int vc4_plane_mode_set(struct drm_plane *plane,
 			      struct drm_plane_state *state)
 {
+	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
 	struct drm_framebuffer *fb = state->fb;
 	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
 	u32 ctl0_offset = vc4_state->dlist_count;
 	const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format);
+	u32 scl;
+	u32 lbm_size;
+	unsigned long irqflags;
 	int ret;
 
 	ret = vc4_plane_setup_clipping_and_scaling(state);
 	if (ret)
 		return ret;
 
+	/* Allocate the LBM memory that the HVS will use for temporary
+	 * storage due to our scaling/format conversion.
+	 */
+	lbm_size = vc4_lbm_size(state);
+	if (lbm_size) {
+		if (!vc4_state->lbm.allocated) {
+			spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
+			ret = drm_mm_insert_node(&vc4->hvs->lbm_mm,
+						 &vc4_state->lbm,
+						 lbm_size, 32, 0);
+			spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
+		} else {
+			WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
+		}
+	}
+
+	if (ret)
+		return ret;
+
+	scl = vc4_get_scl_field(state);
+
+	/* Control word */
 	vc4_dlist_write(vc4_state,
 			SCALER_CTL0_VALID |
 			(format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
 			(format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
-			SCALER_CTL0_UNITY);
+			(vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
+			VC4_SET_FIELD(scl, SCALER_CTL0_SCL0) |
+			VC4_SET_FIELD(scl, SCALER_CTL0_SCL1));
 
 	/* Position Word 0: Image Positions and Alpha Value */
 	vc4_state->pos0_offset = vc4_state->dlist_count;
@@ -238,9 +434,14 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 			VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
 			VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
 
-	/* Position Word 1: Scaled Image Dimensions.
-	 * Skipped due to SCALER_CTL0_UNITY scaling.
-	 */
+	/* Position Word 1: Scaled Image Dimensions. */
+	if (!vc4_state->is_unity) {
+		vc4_dlist_write(vc4_state,
+				VC4_SET_FIELD(vc4_state->crtc_w,
+					      SCALER_POS1_SCL_WIDTH) |
+				VC4_SET_FIELD(vc4_state->crtc_h,
+					      SCALER_POS1_SCL_HEIGHT));
+	}
 
 	/* Position Word 2: Source Image Size, Alpha Mode */
 	vc4_state->pos2_offset = vc4_state->dlist_count;
@@ -266,6 +467,32 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 	vc4_dlist_write(vc4_state,
 			VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH));
 
+	if (!vc4_state->is_unity) {
+		/* LBM Base Address. */
+		if (vc4_state->y_scaling != VC4_SCALING_NONE)
+			vc4_dlist_write(vc4_state, vc4_state->lbm.start);
+
+		vc4_write_scaling_parameters(state);
+
+		/* If any PPF setup was done, then all the kernel
+		 * pointers get uploaded.
+		 */
+		if (vc4_state->x_scaling == VC4_SCALING_PPF ||
+		    vc4_state->y_scaling == VC4_SCALING_PPF) {
+			u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
+						   SCALER_PPF_KERNEL_OFFSET);
+
+			/* HPPF plane 0 */
+			vc4_dlist_write(vc4_state, kernel);
+			/* VPPF plane 0 */
+			vc4_dlist_write(vc4_state, kernel);
+			/* HPPF plane 1 */
+			vc4_dlist_write(vc4_state, kernel);
+			/* VPPF plane 1 */
+			vc4_dlist_write(vc4_state, kernel);
+		}
+	}
+
 	vc4_state->dlist[ctl0_offset] |=
 		VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
 
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 7c29993..a5b544d 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -552,6 +552,21 @@ enum hvs_pixel_format {
 #define SCALER_CTL0_ORDER_MASK			VC4_MASK(14, 13)
 #define SCALER_CTL0_ORDER_SHIFT			13
 
+#define SCALER_CTL0_SCL1_MASK			VC4_MASK(10, 8)
+#define SCALER_CTL0_SCL1_SHIFT			8
+
+#define SCALER_CTL0_SCL0_MASK			VC4_MASK(7, 5)
+#define SCALER_CTL0_SCL0_SHIFT			5
+
+#define SCALER_CTL0_SCL_H_PPF_V_PPF		0
+#define SCALER_CTL0_SCL_H_TPZ_V_PPF		1
+#define SCALER_CTL0_SCL_H_PPF_V_TPZ		2
+#define SCALER_CTL0_SCL_H_TPZ_V_TPZ		3
+#define SCALER_CTL0_SCL_H_PPF_V_NONE		4
+#define SCALER_CTL0_SCL_H_NONE_V_PPF		5
+#define SCALER_CTL0_SCL_H_NONE_V_TPZ		6
+#define SCALER_CTL0_SCL_H_TPZ_V_NONE		7
+
 /* Set to indicate no scaling. */
 #define SCALER_CTL0_UNITY			BIT(4)
 
@@ -567,6 +582,12 @@ enum hvs_pixel_format {
 #define SCALER_POS0_START_X_MASK		VC4_MASK(11, 0)
 #define SCALER_POS0_START_X_SHIFT		0
 
+#define SCALER_POS1_SCL_HEIGHT_MASK		VC4_MASK(27, 16)
+#define SCALER_POS1_SCL_HEIGHT_SHIFT		16
+
+#define SCALER_POS1_SCL_WIDTH_MASK		VC4_MASK(11, 0)
+#define SCALER_POS1_SCL_WIDTH_SHIFT		0
+
 #define SCALER_POS2_ALPHA_MODE_MASK		VC4_MASK(31, 30)
 #define SCALER_POS2_ALPHA_MODE_SHIFT		30
 #define SCALER_POS2_ALPHA_MODE_PIPELINE		0
@@ -580,6 +601,31 @@ enum hvs_pixel_format {
 #define SCALER_POS2_WIDTH_MASK			VC4_MASK(11, 0)
 #define SCALER_POS2_WIDTH_SHIFT			0
 
+#define SCALER_TPZ0_VERT_RECALC			BIT(31)
+#define SCALER_TPZ0_SCALE_MASK			VC4_MASK(28, 8)
+#define SCALER_TPZ0_SCALE_SHIFT			8
+#define SCALER_TPZ0_IPHASE_MASK			VC4_MASK(7, 0)
+#define SCALER_TPZ0_IPHASE_SHIFT		0
+#define SCALER_TPZ1_RECIP_MASK			VC4_MASK(15, 0)
+#define SCALER_TPZ1_RECIP_SHIFT			0
+
+/* Skips interpolating coefficients to 64 phases, so just 8 are used.
+ * Required for nearest neighbor.
+ */
+#define SCALER_PPF_NOINTERP			BIT(31)
+/* Replaes the highest valued coefficient with one that makes all 4
+ * sum to unity.
+ */
+#define SCALER_PPF_AGC				BIT(30)
+#define SCALER_PPF_SCALE_MASK			VC4_MASK(24, 8)
+#define SCALER_PPF_SCALE_SHIFT			8
+#define SCALER_PPF_IPHASE_MASK			VC4_MASK(6, 0)
+#define SCALER_PPF_IPHASE_SHIFT			0
+
+#define SCALER_PPF_KERNEL_OFFSET_MASK		VC4_MASK(13, 0)
+#define SCALER_PPF_KERNEL_OFFSET_SHIFT		0
+#define SCALER_PPF_KERNEL_UNCACHED		BIT(31)
+
 #define SCALER_SRC_PITCH_MASK			VC4_MASK(15, 0)
 #define SCALER_SRC_PITCH_SHIFT			0
 
-- 
2.7.3

From c1f11c3b1a4379841341911c379237d3a3870607 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Tue, 20 Oct 2015 13:59:15 +0100
Subject: [PATCH 14/36] drm/vc4: Add support a few more RGB display plane
 formats.

These were all touch-tested with modetest.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_plane.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 7c2d697..013ebff 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -88,6 +88,22 @@ static const struct hvs_format {
 		.drm = DRM_FORMAT_ARGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
 		.pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true,
 	},
+	{
+		.drm = DRM_FORMAT_RGB565, .hvs = HVS_PIXEL_FORMAT_RGB565,
+		.pixel_order = HVS_PIXEL_ORDER_XRGB, .has_alpha = false,
+	},
+	{
+		.drm = DRM_FORMAT_BGR565, .hvs = HVS_PIXEL_FORMAT_RGB565,
+		.pixel_order = HVS_PIXEL_ORDER_XBGR, .has_alpha = false,
+	},
+	{
+		.drm = DRM_FORMAT_ARGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551,
+		.pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = true,
+	},
+	{
+		.drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551,
+		.pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false,
+	},
 };
 
 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
-- 
2.7.3

From 149a88adaedd0bea6c6f2f12dcf893d740be2ebb Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Wed, 30 Dec 2015 12:25:44 -0800
Subject: [PATCH 15/36] drm/vc4: Add support for YUV planes.

This supports 420 and 422 subsampling with 2 or 3 planes, tested with
modetest.  It doesn't set up chroma subsampling position (which it
appears KMS doesn't deal with yet).

The LBM memory is overallocated in many cases, but apparently the docs
aren't quite correct and I'll probably need to look at the hardware
source to really figure it out.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_plane.c | 256 +++++++++++++++++++++++++++++++---------
 drivers/gpu/drm/vc4/vc4_regs.h  |  56 ++++++++-
 2 files changed, 253 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 013ebff..7b0c72a 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -54,15 +54,19 @@ struct vc4_plane_state {
 	/* Clipped coordinates of the plane on the display. */
 	int crtc_x, crtc_y, crtc_w, crtc_h;
 	/* Clipped area being scanned from in the FB. */
-	u32 src_x, src_y, src_w, src_h;
+	u32 src_x, src_y;
 
-	enum vc4_scaling_mode x_scaling, y_scaling;
+	u32 src_w[2], src_h[2];
+
+	/* Scaling selection for the RGB/Y plane and the Cb/Cr planes. */
+	enum vc4_scaling_mode x_scaling[2], y_scaling[2];
 	bool is_unity;
+	bool is_yuv;
 
 	/* Offset to start scanning out from the start of the plane's
 	 * BO.
 	 */
-	u32 offset;
+	u32 offsets[3];
 
 	/* Our allocation in LBM for temporary storage during scaling. */
 	struct drm_mm_node lbm;
@@ -79,6 +83,7 @@ static const struct hvs_format {
 	u32 hvs; /* HVS_FORMAT_* */
 	u32 pixel_order;
 	bool has_alpha;
+	bool flip_cbcr;
 } hvs_formats[] = {
 	{
 		.drm = DRM_FORMAT_XRGB8888, .hvs = HVS_PIXEL_FORMAT_RGBA8888,
@@ -104,6 +109,32 @@ static const struct hvs_format {
 		.drm = DRM_FORMAT_XRGB1555, .hvs = HVS_PIXEL_FORMAT_RGBA5551,
 		.pixel_order = HVS_PIXEL_ORDER_ABGR, .has_alpha = false,
 	},
+	{
+		.drm = DRM_FORMAT_YUV422,
+		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
+	},
+	{
+		.drm = DRM_FORMAT_YVU422,
+		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
+		.flip_cbcr = true,
+	},
+	{
+		.drm = DRM_FORMAT_YUV420,
+		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
+	},
+	{
+		.drm = DRM_FORMAT_YVU420,
+		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
+		.flip_cbcr = true,
+	},
+	{
+		.drm = DRM_FORMAT_NV12,
+		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
+	},
+	{
+		.drm = DRM_FORMAT_NV16,
+		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
+	},
 };
 
 static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
@@ -219,11 +250,11 @@ static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
  *
  * This is a replication of a table from the spec.
  */
-static u32 vc4_get_scl_field(struct drm_plane_state *state)
+static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
 {
 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
 
-	switch (vc4_state->x_scaling << 2 | vc4_state->y_scaling) {
+	switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
 	case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
 		return SCALER_CTL0_SCL_H_PPF_V_PPF;
 	case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
@@ -254,9 +285,16 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
 	struct drm_plane *plane = state->plane;
 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
 	struct drm_framebuffer *fb = state->fb;
+	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
 	u32 subpixel_src_mask = (1 << 16) - 1;
+	u32 format = fb->pixel_format;
+	int num_planes = drm_format_num_planes(format);
+	u32 h_subsample = 1;
+	u32 v_subsample = 1;
+	int i;
 
-	vc4_state->offset = fb->offsets[0];
+	for (i = 0; i < num_planes; i++)
+		vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
 
 	/* We don't support subpixel source positioning for scaling. */
 	if ((state->src_x & subpixel_src_mask) ||
@@ -268,20 +306,48 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
 
 	vc4_state->src_x = state->src_x >> 16;
 	vc4_state->src_y = state->src_y >> 16;
-	vc4_state->src_w = state->src_w >> 16;
-	vc4_state->src_h = state->src_h >> 16;
+	vc4_state->src_w[0] = state->src_w >> 16;
+	vc4_state->src_h[0] = state->src_h >> 16;
 
 	vc4_state->crtc_x = state->crtc_x;
 	vc4_state->crtc_y = state->crtc_y;
 	vc4_state->crtc_w = state->crtc_w;
 	vc4_state->crtc_h = state->crtc_h;
 
-	vc4_state->x_scaling = vc4_get_scaling_mode(vc4_state->src_w,
-						    vc4_state->crtc_w);
-	vc4_state->y_scaling = vc4_get_scaling_mode(vc4_state->src_h,
-						    vc4_state->crtc_h);
-	vc4_state->is_unity = (vc4_state->x_scaling == VC4_SCALING_NONE &&
-			       vc4_state->y_scaling == VC4_SCALING_NONE);
+	vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
+						       vc4_state->crtc_w);
+	vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
+						       vc4_state->crtc_h);
+
+	if (num_planes > 1) {
+		vc4_state->is_yuv = true;
+
+		h_subsample = drm_format_horz_chroma_subsampling(format);
+		v_subsample = drm_format_vert_chroma_subsampling(format);
+		vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
+		vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
+
+		vc4_state->x_scaling[1] =
+			vc4_get_scaling_mode(vc4_state->src_w[1],
+					     vc4_state->crtc_w);
+		vc4_state->y_scaling[1] =
+			vc4_get_scaling_mode(vc4_state->src_h[1],
+					     vc4_state->crtc_h);
+
+		/* YUV conversion requires that scaling be enabled,
+		 * even on a plane that's otherwise 1:1.  Choose TPZ
+		 * for simplicity.
+		 */
+		if (vc4_state->x_scaling[0] == VC4_SCALING_NONE)
+			vc4_state->x_scaling[0] = VC4_SCALING_TPZ;
+		if (vc4_state->y_scaling[0] == VC4_SCALING_NONE)
+			vc4_state->y_scaling[0] = VC4_SCALING_TPZ;
+	}
+
+	vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
+			       vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
+			       vc4_state->x_scaling[1] == VC4_SCALING_NONE &&
+			       vc4_state->y_scaling[1] == VC4_SCALING_NONE);
 
 	/* No configuring scaling on the cursor plane, since it gets
 	   non-vblank-synced updates, and scaling requires requires
@@ -294,16 +360,27 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
 	 * support negative y, and negative x wastes bandwidth.
 	 */
 	if (vc4_state->crtc_x < 0) {
-		vc4_state->offset += (drm_format_plane_cpp(fb->pixel_format,
-							   0) *
-				      -vc4_state->crtc_x);
-		vc4_state->src_w += vc4_state->crtc_x;
+		for (i = 0; i < num_planes; i++) {
+			u32 cpp = drm_format_plane_cpp(fb->pixel_format, i);
+			u32 subs = ((i == 0) ? 1 : h_subsample);
+
+			vc4_state->offsets[i] += (cpp *
+						  (-vc4_state->crtc_x) / subs);
+		}
+		vc4_state->src_w[0] += vc4_state->crtc_x;
+		vc4_state->src_w[1] += vc4_state->crtc_x / h_subsample;
 		vc4_state->crtc_x = 0;
 	}
 
 	if (vc4_state->crtc_y < 0) {
-		vc4_state->offset += fb->pitches[0] * -vc4_state->crtc_y;
-		vc4_state->src_h += vc4_state->crtc_y;
+		for (i = 0; i < num_planes; i++) {
+			u32 subs = ((i == 0) ? 1 : v_subsample);
+
+			vc4_state->offsets[i] += (fb->pitches[i] *
+						  (-vc4_state->crtc_y) / subs);
+		}
+		vc4_state->src_h[0] += vc4_state->crtc_y;
+		vc4_state->src_h[1] += vc4_state->crtc_y / v_subsample;
 		vc4_state->crtc_y = 0;
 	}
 
@@ -344,15 +421,23 @@ static u32 vc4_lbm_size(struct drm_plane_state *state)
 	/* This is the worst case number.  One of the two sizes will
 	 * be used depending on the scaling configuration.
 	 */
-	u32 pix_per_line = max(vc4_state->src_w, (u32)vc4_state->crtc_w);
+	u32 pix_per_line = max(vc4_state->src_w[0], (u32)vc4_state->crtc_w);
 	u32 lbm;
 
-	if (vc4_state->is_unity)
-		return 0;
-	else if (vc4_state->y_scaling == VC4_SCALING_TPZ)
-		lbm = pix_per_line * 8;
-	else {
-		/* In special cases, this multiplier might be 12. */
+	if (!vc4_state->is_yuv) {
+		if (vc4_state->is_unity)
+			return 0;
+		else if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
+			lbm = pix_per_line * 8;
+		else {
+			/* In special cases, this multiplier might be 12. */
+			lbm = pix_per_line * 16;
+		}
+	} else {
+		/* There are cases for this going down to a multiplier
+		 * of 2, but according to the firmware source, the
+		 * table in the docs is somewhat wrong.
+		 */
 		lbm = pix_per_line * 16;
 	}
 
@@ -361,33 +446,34 @@ static u32 vc4_lbm_size(struct drm_plane_state *state)
 	return lbm;
 }
 
-static void vc4_write_scaling_parameters(struct drm_plane_state *state)
+static void vc4_write_scaling_parameters(struct drm_plane_state *state,
+					 int channel)
 {
 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
 
 	/* Ch0 H-PPF Word 0: Scaling Parameters */
-	if (vc4_state->x_scaling == VC4_SCALING_PPF) {
+	if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
 		vc4_write_ppf(vc4_state,
-			      vc4_state->src_w, vc4_state->crtc_w);
+			      vc4_state->src_w[channel], vc4_state->crtc_w);
 	}
 
 	/* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
-	if (vc4_state->y_scaling == VC4_SCALING_PPF) {
+	if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
 		vc4_write_ppf(vc4_state,
-			      vc4_state->src_h, vc4_state->crtc_h);
+			      vc4_state->src_h[channel], vc4_state->crtc_h);
 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
 	}
 
 	/* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
-	if (vc4_state->x_scaling == VC4_SCALING_TPZ) {
+	if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
 		vc4_write_tpz(vc4_state,
-			      vc4_state->src_w, vc4_state->crtc_w);
+			      vc4_state->src_w[channel], vc4_state->crtc_w);
 	}
 
 	/* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
-	if (vc4_state->y_scaling == VC4_SCALING_TPZ) {
+	if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
 		vc4_write_tpz(vc4_state,
-			      vc4_state->src_h, vc4_state->crtc_h);
+			      vc4_state->src_h[channel], vc4_state->crtc_h);
 		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
 	}
 }
@@ -401,13 +487,13 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
 	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
 	struct drm_framebuffer *fb = state->fb;
-	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
 	u32 ctl0_offset = vc4_state->dlist_count;
 	const struct hvs_format *format = vc4_get_hvs_format(fb->pixel_format);
-	u32 scl;
+	int num_planes = drm_format_num_planes(format->drm);
+	u32 scl0, scl1;
 	u32 lbm_size;
 	unsigned long irqflags;
-	int ret;
+	int ret, i;
 
 	ret = vc4_plane_setup_clipping_and_scaling(state);
 	if (ret)
@@ -432,7 +518,19 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 	if (ret)
 		return ret;
 
-	scl = vc4_get_scl_field(state);
+	/* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
+	 * and 4:4:4, scl1 should be set to scl0 so both channels of
+	 * the scaler do the same thing.  For YUV, the Y plane needs
+	 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
+	 * the scl fields here.
+	 */
+	if (num_planes == 1) {
+		scl0 = vc4_get_scl_field(state, 1);
+		scl1 = scl0;
+	} else {
+		scl0 = vc4_get_scl_field(state, 1);
+		scl1 = vc4_get_scl_field(state, 0);
+	}
 
 	/* Control word */
 	vc4_dlist_write(vc4_state,
@@ -440,8 +538,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 			(format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
 			(format->hvs << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
 			(vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
-			VC4_SET_FIELD(scl, SCALER_CTL0_SCL0) |
-			VC4_SET_FIELD(scl, SCALER_CTL0_SCL1));
+			VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
+			VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
 
 	/* Position Word 0: Image Positions and Alpha Value */
 	vc4_state->pos0_offset = vc4_state->dlist_count;
@@ -466,35 +564,68 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 				      SCALER_POS2_ALPHA_MODE_PIPELINE :
 				      SCALER_POS2_ALPHA_MODE_FIXED,
 				      SCALER_POS2_ALPHA_MODE) |
-			VC4_SET_FIELD(vc4_state->src_w, SCALER_POS2_WIDTH) |
-			VC4_SET_FIELD(vc4_state->src_h, SCALER_POS2_HEIGHT));
+			VC4_SET_FIELD(vc4_state->src_w[0], SCALER_POS2_WIDTH) |
+			VC4_SET_FIELD(vc4_state->src_h[0], SCALER_POS2_HEIGHT));
 
 	/* Position Word 3: Context.  Written by the HVS. */
 	vc4_dlist_write(vc4_state, 0xc0c0c0c0);
 
-	/* Pointer Word 0: RGB / Y Pointer */
+
+	/* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
+	 *
+	 * The pointers may be any byte address.
+	 */
 	vc4_state->ptr0_offset = vc4_state->dlist_count;
-	vc4_dlist_write(vc4_state, bo->paddr + vc4_state->offset);
+	if (!format->flip_cbcr) {
+		for (i = 0; i < num_planes; i++)
+			vc4_dlist_write(vc4_state, vc4_state->offsets[i]);
+	} else {
+		WARN_ON_ONCE(num_planes != 3);
+		vc4_dlist_write(vc4_state, vc4_state->offsets[0]);
+		vc4_dlist_write(vc4_state, vc4_state->offsets[2]);
+		vc4_dlist_write(vc4_state, vc4_state->offsets[1]);
+	}
 
-	/* Pointer Context Word 0: Written by the HVS */
-	vc4_dlist_write(vc4_state, 0xc0c0c0c0);
+	/* Pointer Context Word 0/1/2: Written by the HVS */
+	for (i = 0; i < num_planes; i++)
+		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
 
-	/* Pitch word 0: Pointer 0 Pitch */
-	vc4_dlist_write(vc4_state,
-			VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH));
+	/* Pitch word 0/1/2 */
+	for (i = 0; i < num_planes; i++) {
+		vc4_dlist_write(vc4_state,
+				VC4_SET_FIELD(fb->pitches[i], SCALER_SRC_PITCH));
+	}
+
+	/* Colorspace conversion words */
+	if (vc4_state->is_yuv) {
+		vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5);
+		vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5);
+		vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5);
+	}
 
 	if (!vc4_state->is_unity) {
 		/* LBM Base Address. */
-		if (vc4_state->y_scaling != VC4_SCALING_NONE)
+		if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
+		    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
 			vc4_dlist_write(vc4_state, vc4_state->lbm.start);
+		}
 
-		vc4_write_scaling_parameters(state);
+		if (num_planes > 1) {
+			/* Emit Cb/Cr as channel 0 and Y as channel
+			 * 1. This matches how we set up scl0/scl1
+			 * above.
+			 */
+			vc4_write_scaling_parameters(state, 1);
+		}
+		vc4_write_scaling_parameters(state, 0);
 
 		/* If any PPF setup was done, then all the kernel
 		 * pointers get uploaded.
 		 */
-		if (vc4_state->x_scaling == VC4_SCALING_PPF ||
-		    vc4_state->y_scaling == VC4_SCALING_PPF) {
+		if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
+		    vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
+		    vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
+		    vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
 			u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
 						   SCALER_PPF_KERNEL_OFFSET);
 
@@ -698,6 +829,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
 	struct drm_plane *plane = NULL;
 	struct vc4_plane *vc4_plane;
 	u32 formats[ARRAY_SIZE(hvs_formats)];
+	u32 num_formats = 0;
 	int ret = 0;
 	unsigned i;
 
@@ -708,12 +840,20 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
 		goto fail;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++)
-		formats[i] = hvs_formats[i].drm;
+	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
+		/* Don't allow YUV in cursor planes, since that means
+		 * tuning on the scaler, which we don't allow for the
+		 * cursor.
+		 */
+		if (type != DRM_PLANE_TYPE_CURSOR ||
+		    hvs_formats[i].hvs < HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE) {
+			formats[num_formats++] = hvs_formats[i].drm;
+		}
+	}
 	plane = &vc4_plane->base;
 	ret = drm_universal_plane_init(dev, plane, 0xff,
 				       &vc4_plane_funcs,
-				       formats, ARRAY_SIZE(formats),
+				       formats, num_formats,
 				       type, NULL);
 
 	drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index a5b544d..bf42a8e 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -519,7 +519,12 @@ enum hvs_pixel_format {
 	HVS_PIXEL_FORMAT_RGB888 = 5,
 	HVS_PIXEL_FORMAT_RGBA6666 = 6,
 	/* 32bpp */
-	HVS_PIXEL_FORMAT_RGBA8888 = 7
+	HVS_PIXEL_FORMAT_RGBA8888 = 7,
+
+	HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE = 8,
+	HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE = 9,
+	HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE = 10,
+	HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE = 11,
 };
 
 /* Note: the LSB is the rightmost character shown.  Only valid for
@@ -601,6 +606,55 @@ enum hvs_pixel_format {
 #define SCALER_POS2_WIDTH_MASK			VC4_MASK(11, 0)
 #define SCALER_POS2_WIDTH_SHIFT			0
 
+/* Color Space Conversion words.  Some values are S2.8 signed
+ * integers, except that the 2 integer bits map as {0x0: 0, 0x1: 1,
+ * 0x2: 2, 0x3: -1}
+ */
+/* bottom 8 bits of S2.8 contribution of Cr to Blue */
+#define SCALER_CSC0_COEF_CR_BLU_MASK		VC4_MASK(31, 24)
+#define SCALER_CSC0_COEF_CR_BLU_SHIFT		24
+/* Signed offset to apply to Y before CSC. (Y' = Y + YY_OFS) */
+#define SCALER_CSC0_COEF_YY_OFS_MASK		VC4_MASK(23, 16)
+#define SCALER_CSC0_COEF_YY_OFS_SHIFT		16
+/* Signed offset to apply to CB before CSC (Cb' = Cb - 128 + CB_OFS). */
+#define SCALER_CSC0_COEF_CB_OFS_MASK		VC4_MASK(15, 8)
+#define SCALER_CSC0_COEF_CB_OFS_SHIFT		8
+/* Signed offset to apply to CB before CSC (Cr' = Cr - 128 + CR_OFS). */
+#define SCALER_CSC0_COEF_CR_OFS_MASK		VC4_MASK(7, 0)
+#define SCALER_CSC0_COEF_CR_OFS_SHIFT		0
+#define SCALER_CSC0_ITR_R_601_5			0x00f00000
+#define SCALER_CSC0_ITR_R_709_3			0x00f00000
+#define SCALER_CSC0_JPEG_JFIF			0x00000000
+
+/* S2.8 contribution of Cb to Green */
+#define SCALER_CSC1_COEF_CB_GRN_MASK		VC4_MASK(31, 22)
+#define SCALER_CSC1_COEF_CB_GRN_SHIFT		22
+/* S2.8 contribution of Cr to Green */
+#define SCALER_CSC1_COEF_CR_GRN_MASK		VC4_MASK(21, 12)
+#define SCALER_CSC1_COEF_CR_GRN_SHIFT		12
+/* S2.8 contribution of Y to all of RGB */
+#define SCALER_CSC1_COEF_YY_ALL_MASK		VC4_MASK(11, 2)
+#define SCALER_CSC1_COEF_YY_ALL_SHIFT		2
+/* top 2 bits of S2.8 contribution of Cr to Blue */
+#define SCALER_CSC1_COEF_CR_BLU_MASK		VC4_MASK(1, 0)
+#define SCALER_CSC1_COEF_CR_BLU_SHIFT		0
+#define SCALER_CSC1_ITR_R_601_5			0xe73304a8
+#define SCALER_CSC1_ITR_R_709_3			0xf2b784a8
+#define SCALER_CSC1_JPEG_JFIF			0xea34a400
+
+/* S2.8 contribution of Cb to Red */
+#define SCALER_CSC2_COEF_CB_RED_MASK		VC4_MASK(29, 20)
+#define SCALER_CSC2_COEF_CB_RED_SHIFT		20
+/* S2.8 contribution of Cr to Red */
+#define SCALER_CSC2_COEF_CR_RED_MASK		VC4_MASK(19, 10)
+#define SCALER_CSC2_COEF_CR_RED_SHIFT		10
+/* S2.8 contribution of Cb to Blue */
+#define SCALER_CSC2_COEF_CB_BLU_MASK		VC4_MASK(19, 10)
+#define SCALER_CSC2_COEF_CB_BLU_SHIFT		10
+#define SCALER_CSC2_ITR_R_601_5			0x00066204
+#define SCALER_CSC2_ITR_R_709_3			0x00072a1c
+#define SCALER_CSC2_JPEG_JFIF			0x000599c5
+
 #define SCALER_TPZ0_VERT_RECALC			BIT(31)
 #define SCALER_TPZ0_SCALE_MASK			VC4_MASK(28, 8)
 #define SCALER_TPZ0_SCALE_SHIFT			8
-- 
2.7.3

From 47ed1ee3dbfde89297b81bc09f1b483f4da1b06d Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 29 Feb 2016 17:53:00 -0800
Subject: [PATCH 16/36] drm/vc4: Let gpiolib know that we're OK with sleeping
 for HPD.

Fixes an error thrown every few seconds when we poll HPD when it's on
a I2C to GPIO expander.

Signed-off-by: Eric Anholt <eric@anholt.net>
Tested-by: Daniel Stone <daniels@collabora.com>
---
 drivers/gpu/drm/vc4/vc4_hdmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 56272ca..6bcf51d 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -166,7 +166,7 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
 	if (vc4->hdmi->hpd_gpio) {
-		if (gpio_get_value(vc4->hdmi->hpd_gpio))
+		if (gpio_get_value_cansleep(vc4->hdmi->hpd_gpio))
 			return connector_status_connected;
 		else
 			return connector_status_disconnected;
-- 
2.7.3

From 24b28acef42486c282bc58e977cbfc66191a8f38 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 29 Feb 2016 17:53:01 -0800
Subject: [PATCH 17/36] drm/vc4: Respect GPIO_ACTIVE_LOW on HDMI HPD if set in
 the devicetree.

The original Raspberry Pi had the GPIO active high, but the later
models are active low.  The DT GPIO bindings allow specifying the
active flag, except that it doesn't get propagated to the gpiodesc, so
you have to handle it yourself.

Signed-off-by: Eric Anholt <eric@anholt.net>
Tested-by: Daniel Stone <daniels@collabora.com>
---
 drivers/gpu/drm/vc4/vc4_hdmi.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 6bcf51d..d8b8649 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -47,6 +47,7 @@ struct vc4_hdmi {
 	void __iomem *hdmicore_regs;
 	void __iomem *hd_regs;
 	int hpd_gpio;
+	bool hpd_active_low;
 
 	struct clk *pixel_clock;
 	struct clk *hsm_clock;
@@ -166,7 +167,8 @@ vc4_hdmi_connector_detect(struct drm_connector *connector, bool force)
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
 
 	if (vc4->hdmi->hpd_gpio) {
-		if (gpio_get_value_cansleep(vc4->hdmi->hpd_gpio))
+		if (gpio_get_value_cansleep(vc4->hdmi->hpd_gpio) ^
+		    vc4->hdmi->hpd_active_low)
 			return connector_status_connected;
 		else
 			return connector_status_disconnected;
@@ -517,11 +519,17 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data)
 	 * we'll use the HDMI core's register.
 	 */
 	if (of_find_property(dev->of_node, "hpd-gpios", &value)) {
-		hdmi->hpd_gpio = of_get_named_gpio(dev->of_node, "hpd-gpios", 0);
+		enum of_gpio_flags hpd_gpio_flags;
+
+		hdmi->hpd_gpio = of_get_named_gpio_flags(dev->of_node,
+							 "hpd-gpios", 0,
+							 &hpd_gpio_flags);
 		if (hdmi->hpd_gpio < 0) {
 			ret = hdmi->hpd_gpio;
 			goto err_unprepare_hsm;
 		}
+
+		hdmi->hpd_active_low = hpd_gpio_flags & OF_GPIO_ACTIVE_LOW;
 	}
 
 	vc4->hdmi = hdmi;
-- 
2.7.3

From d258332b618af0136386d4fb8f738caedae8c71d Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 8 Mar 2016 15:09:41 +0300
Subject: [PATCH 18/36] drm/vc4: Return -EFAULT on copy_from_user() failure

The copy_from_user() function returns the number of bytes not copied but
we want to return a negative error code.

Fixes: 463873d57014 ('drm/vc4: Add an API for creating GPU shaders in GEM BOs.')
Cc: stable@vger.kernel.org
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/vc4/vc4_bo.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/vc4/vc4_bo.c b/drivers/gpu/drm/vc4/vc4_bo.c
index 22278bc..ac8eafe 100644
--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -499,11 +499,12 @@ vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
-	ret = copy_from_user(bo->base.vaddr,
+	if (copy_from_user(bo->base.vaddr,
 			     (void __user *)(uintptr_t)args->data,
-			     args->size);
-	if (ret != 0)
+			     args->size)) {
+		ret = -EFAULT;
 		goto fail;
+	}
 	/* Clear the rest of the memory from allocating from the BO
 	 * cache.
 	 */
-- 
2.7.3

From ed6836e411dd559a811dd063509a01772f4fe00f Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 4 Mar 2016 12:32:07 -0800
Subject: [PATCH 19/36] drm/vc4: Recognize a more specific compatible string
 for V3D.

The Raspberry Pi Foundation's firmware updates are shipping device
trees using the old string, so we'll keep recognizing that as this rev
of V3D.  Still, we should use a more specific name in the upstream DT
to clarify which board is being supported, in case we do other revs of
V3D in the future.

Signed-off-by: Eric Anholt <eric@anholt.net>
Acked-by: Stephen Warren <swarren@wwwdotorg.org>
---
 drivers/gpu/drm/vc4/vc4_v3d.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c
index 31de5d1..e6d3c60 100644
--- a/drivers/gpu/drm/vc4/vc4_v3d.c
+++ b/drivers/gpu/drm/vc4/vc4_v3d.c
@@ -268,6 +268,7 @@ static int vc4_v3d_dev_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id vc4_v3d_dt_match[] = {
+	{ .compatible = "brcm,bcm2835-v3d" },
 	{ .compatible = "brcm,vc4-v3d" },
 	{}
 };
-- 
2.7.3

From 142ad4bd29558b85f269a02b74c5149b514acf88 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 15 Feb 2016 19:03:57 -0800
Subject: [PATCH 20/36] clk: bcm2835: Fix setting of PLL divider clock rates

Our dividers weren't being set successfully because CM_PASSWORD wasn't
included in the register write.  It looks easier to just compute the
divider to write ourselves than to update clk-divider for the ability
to OR in some arbitrary bits on write.

Fixes about half of the video modes on my HDMI monitor (everything
except 720x400).

Cc: stable@vger.kernel.org
Signed-off-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
---
 drivers/clk/bcm/clk-bcm2835.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
index 015e687..9f4df8f 100644
--- a/drivers/clk/bcm/clk-bcm2835.c
+++ b/drivers/clk/bcm/clk-bcm2835.c
@@ -1107,13 +1107,15 @@ static int bcm2835_pll_divider_set_rate(struct clk_hw *hw,
 	struct bcm2835_pll_divider *divider = bcm2835_pll_divider_from_hw(hw);
 	struct bcm2835_cprman *cprman = divider->cprman;
 	const struct bcm2835_pll_divider_data *data = divider->data;
-	u32 cm;
-	int ret;
+	u32 cm, div, max_div = 1 << A2W_PLL_DIV_BITS;
 
-	ret = clk_divider_ops.set_rate(hw, rate, parent_rate);
-	if (ret)
-		return ret;
+	div = DIV_ROUND_UP_ULL(parent_rate, rate);
+
+	div = min(div, max_div);
+	if (div == max_div)
+		div = 0;
 
+	cprman_write(cprman, data->a2w_reg, div);
 	cm = cprman_read(cprman, data->cm_reg);
 	cprman_write(cprman, data->cm_reg, cm | data->load_mask);
 	cprman_write(cprman, data->cm_reg, cm & ~data->load_mask);
-- 
2.7.3

From 55acd7db60c8247d926969b705373765c26c1f44 Mon Sep 17 00:00:00 2001
From: Martin Sperl <kernel@martin.sperl.org>
Date: Fri, 11 Sep 2015 11:22:05 +0000
Subject: [PATCH 21/36] ARM: bcm2835: add the auxiliary spi1 and spi2 to the
 device tree

This enables the use of the auxiliary spi1 and spi2 devices
on the bcm2835 SOC.

Note that this requires the use of the new clk-bcm2835-aux to work.

Signed-off-by: Martin Sperl <kernel@martin.sperl.org>
Acked-by: Stephen Warren <swarren@wwwdotorg.org>
[anholt: Rebased on 2835.dtsi -> 283x.dtsi change]
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 arch/arm/boot/dts/bcm283x.dtsi | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 971e741..f0d4573 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -1,5 +1,6 @@
 #include <dt-bindings/pinctrl/bcm2835.h>
 #include <dt-bindings/clock/bcm2835.h>
+#include <dt-bindings/clock/bcm2835-aux.h>
 #include "skeleton.dtsi"
 
 /* This include file covers the common peripherals and configuration between
@@ -159,6 +160,26 @@
 			clocks = <&clocks BCM2835_CLOCK_VPU>;
 		};
 
+		spi1: spi@7e215080 {
+			compatible = "brcm,bcm2835-aux-spi";
+			reg = <0x7e215080 0x40>;
+			interrupts = <1 29>;
+			clocks = <&aux BCM2835_AUX_CLOCK_SPI1>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
+		spi2: spi@7e2150c0 {
+			compatible = "brcm,bcm2835-aux-spi";
+			reg = <0x7e2150c0 0x40>;
+			interrupts = <1 29>;
+			clocks = <&aux BCM2835_AUX_CLOCK_SPI2>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+			status = "disabled";
+		};
+
 		sdhci: sdhci@7e300000 {
 			compatible = "brcm,bcm2835-sdhci";
 			reg = <0x7e300000 0x100>;
-- 
2.7.3

From 3e0a385cebe3b0d9338cab356c6a11daaa64f808 Mon Sep 17 00:00:00 2001
From: Remi Pommarel <repk@triplefau.lt>
Date: Mon, 21 Dec 2015 21:12:59 +0100
Subject: [PATCH 22/36] ARM: bcm2835: Add PWM clock support to the device tree

Signed-off-by: Remi Pommarel <repk@triplefau.lt>
[anholt: Rebased on 2835.dtsi -> 283x.dtsi change]
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 arch/arm/boot/dts/bcm2835-rpi.dtsi |  4 ++++
 arch/arm/boot/dts/bcm283x.dtsi     | 10 ++++++++++
 2 files changed, 14 insertions(+)

diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi
index 3afb9fe..a584a93 100644
--- a/arch/arm/boot/dts/bcm2835-rpi.dtsi
+++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi
@@ -58,3 +58,7 @@
 	status = "okay";
 	bus-width = <4>;
 };
+
+&pwm {
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index f0d4573..e4a2792 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -180,6 +180,16 @@
 			status = "disabled";
 		};
 
+		pwm: pwm@7e20c000 {
+			compatible = "brcm,bcm2835-pwm";
+			reg = <0x7e20c000 0x28>;
+			clocks = <&clocks BCM2835_CLOCK_PWM>;
+			assigned-clocks = <&clocks BCM2835_CLOCK_PWM>;
+			assigned-clock-rates = <10000000>;
+			#pwm-cells = <2>;
+			status = "disabled";
+		};
+
 		sdhci: sdhci@7e300000 {
 			compatible = "brcm,bcm2835-sdhci";
 			reg = <0x7e300000 0x100>;
-- 
2.7.3

From 84416a8360e7c31e6ba9f7775c077bd5f3fe32de Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Mon, 25 Jan 2016 21:40:06 +0100
Subject: [PATCH 23/36] ARM: bcm2835: dt: Add Raspberry Pi Model A

This one is essentially the same as revision 2 B board (with the I2S on
P5 header).

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Acked-by: Stephen Warren <swarren@wwwdotorg.org>
[anholt: Rebased on bcm2835.dtsi -> bcm283x.dtsi change]
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 arch/arm/boot/dts/Makefile          |  1 +
 arch/arm/boot/dts/bcm2835-rpi-a.dts | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+)
 create mode 100644 arch/arm/boot/dts/bcm2835-rpi-a.dts

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index a4a6d70..d000814 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -60,6 +60,7 @@ dtb-$(CONFIG_ARCH_AXXIA) += \
 	axm5516-amarillo.dtb
 dtb-$(CONFIG_ARCH_BCM2835) += \
 	bcm2835-rpi-b.dtb \
+	bcm2835-rpi-a.dtb \
 	bcm2835-rpi-b-rev2.dtb \
 	bcm2835-rpi-b-plus.dtb \
 	bcm2835-rpi-a-plus.dtb \
diff --git a/arch/arm/boot/dts/bcm2835-rpi-a.dts b/arch/arm/boot/dts/bcm2835-rpi-a.dts
new file mode 100644
index 0000000..ddbbbbd
--- /dev/null
+++ b/arch/arm/boot/dts/bcm2835-rpi-a.dts
@@ -0,0 +1,24 @@
+/dts-v1/;
+#include "bcm2835.dtsi"
+#include "bcm2835-rpi.dtsi"
+
+/ {
+	compatible = "raspberrypi,model-a", "brcm,bcm2835";
+	model = "Raspberry Pi Model A";
+
+	leds {
+		act {
+			gpios = <&gpio 16 1>;
+		};
+	};
+};
+
+&gpio {
+	pinctrl-0 = <&gpioout &alt0 &i2s_alt2 &alt3>;
+
+	/* I2S interface */
+	i2s_alt2: i2s_alt2 {
+		brcm,pins = <28 29 30 31>;
+		brcm,function = <BCM2835_FSEL_ALT2>;
+	};
+};
-- 
2.7.3

From f6fd06c97f0d6d8398b8caba1b6879fa7e0284ba Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 16 Dec 2015 16:26:49 -0800
Subject: [PATCH 24/36] ARM: bcm2835: Add the Raspberry Pi power domain driver
 to the DT.

This connects the USB driver to the USB power domain, so that USB can
actually be turned on at boot if the bootloader didn't do it for us.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
---
 arch/arm/boot/dts/bcm2835-rpi.dtsi | 12 ++++++++++++
 arch/arm/boot/dts/bcm283x.dtsi     |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi
index a584a93..76bdbca 100644
--- a/arch/arm/boot/dts/bcm2835-rpi.dtsi
+++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi
@@ -1,3 +1,5 @@
+#include <dt-bindings/power/raspberrypi-power.h>
+
 / {
 	memory {
 		reg = <0 0x10000000>;
@@ -18,6 +20,12 @@
 			compatible = "raspberrypi,bcm2835-firmware";
 			mboxes = <&mailbox>;
 		};
+
+		power: power {
+			compatible = "raspberrypi,bcm2835-power";
+			firmware = <&firmware>;
+			#power-domain-cells = <1>;
+		};
 	};
 };
 
@@ -62,3 +70,7 @@
 &pwm {
 	status = "okay";
 };
+
+&usb {
+	power-domains = <&power RPI_POWER_DOMAIN_USB>;
+};
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index e4a2792..e69a6cf 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -218,7 +218,7 @@
 			status = "disabled";
 		};
 
-		usb@7e980000 {
+		usb: usb@7e980000 {
 			compatible = "brcm,bcm2835-usb";
 			reg = <0x7e980000 0x10000>;
 			interrupts = <1 9>;
-- 
2.7.3

From 580146c680ac7b706ac54d7d7db8204bee3d2e93 Mon Sep 17 00:00:00 2001
From: Martin Sperl <kernel@martin.sperl.org>
Date: Fri, 12 Feb 2016 11:14:25 +0000
Subject: [PATCH 25/36] ARM: bcm2835: add bcm2835-aux-uart support to DT

Add bcm2835-aux-uart support to the device tree.

Signed-off-by: Martin Sperl <kernel@martin.sperl.org>
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 arch/arm/boot/dts/bcm283x.dtsi | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index e69a6cf..fc67964 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -160,6 +160,14 @@
 			clocks = <&clocks BCM2835_CLOCK_VPU>;
 		};
 
+		uart1: serial@7e215040 {
+			compatible = "brcm,bcm2835-aux-uart";
+			reg = <0x7e215040 0x40>;
+			interrupts = <1 29>;
+			clocks = <&aux BCM2835_AUX_CLOCK_UART>;
+			status = "disabled";
+		};
+
 		spi1: spi@7e215080 {
 			compatible = "brcm,bcm2835-aux-spi";
 			reg = <0x7e215080 0x40>;
-- 
2.7.3

From 41135d2ce60509e53306e5b76afab98ddc15951b Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Mon, 2 Mar 2015 14:36:16 -0800
Subject: [PATCH 26/36] ARM: bcm2835: Add VC4 to the device tree.

VC4 is the GPU (display and 3D) present on the 283x.

v2: Sort by register address, mark HDMI as disabled by default in the
    SoC file and enable it from -rpi.
v3: Add references to the pixel/HSM clocks for HDMI.  Rename
    compatibility strings and clean up node names.
v4: Fix comment marking pv0's interrupt as pwa2 instead of pwa0.
    Rename hpd-gpio to hpd-gpios.
v5: Rebase on bcm283x.dtsi change, add v3d.
v6: Make HDMI reference the power domain.
v7: Fix the HDMI HPD gpios active value and HDMI enable for each RPI
    board.  Change V3D compatible string to 2835.

Signed-off-by: Eric Anholt <eric@anholt.net>
---
 arch/arm/boot/dts/bcm2835-rpi-a-plus.dts |  4 +++
 arch/arm/boot/dts/bcm2835-rpi-a.dts      |  4 +++
 arch/arm/boot/dts/bcm2835-rpi-b-plus.dts |  4 +++
 arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts |  4 +++
 arch/arm/boot/dts/bcm2835-rpi-b.dts      |  4 +++
 arch/arm/boot/dts/bcm2835-rpi.dtsi       |  9 ++++++
 arch/arm/boot/dts/bcm2836-rpi-2-b.dts    |  4 +++
 arch/arm/boot/dts/bcm283x.dtsi           | 47 ++++++++++++++++++++++++++++++++
 8 files changed, 80 insertions(+)

diff --git a/arch/arm/boot/dts/bcm2835-rpi-a-plus.dts b/arch/arm/boot/dts/bcm2835-rpi-a-plus.dts
index 228614f..35ff4e7a 100644
--- a/arch/arm/boot/dts/bcm2835-rpi-a-plus.dts
+++ b/arch/arm/boot/dts/bcm2835-rpi-a-plus.dts
@@ -29,3 +29,7 @@
 		brcm,function = <BCM2835_FSEL_ALT0>;
 	};
 };
+
+&hdmi {
+	hpd-gpios = <&gpio 46 GPIO_ACTIVE_LOW>;
+};
diff --git a/arch/arm/boot/dts/bcm2835-rpi-a.dts b/arch/arm/boot/dts/bcm2835-rpi-a.dts
index ddbbbbd..306a84e 100644
--- a/arch/arm/boot/dts/bcm2835-rpi-a.dts
+++ b/arch/arm/boot/dts/bcm2835-rpi-a.dts
@@ -22,3 +22,7 @@
 		brcm,function = <BCM2835_FSEL_ALT2>;
 	};
 };
+
+&hdmi {
+	hpd-gpios = <&gpio 46 GPIO_ACTIVE_HIGH>;
+};
diff --git a/arch/arm/boot/dts/bcm2835-rpi-b-plus.dts b/arch/arm/boot/dts/bcm2835-rpi-b-plus.dts
index ef54050..57d313b 100644
--- a/arch/arm/boot/dts/bcm2835-rpi-b-plus.dts
+++ b/arch/arm/boot/dts/bcm2835-rpi-b-plus.dts
@@ -29,3 +29,7 @@
 		brcm,function = <BCM2835_FSEL_ALT0>;
 	};
 };
+
+&hdmi {
+	hpd-gpios = <&gpio 46 GPIO_ACTIVE_LOW>;
+};
diff --git a/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts b/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts
index 86f1f2f..cf2774e 100644
--- a/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts
+++ b/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts
@@ -22,3 +22,7 @@
 		brcm,function = <BCM2835_FSEL_ALT2>;
 	};
 };
+
+&hdmi {
+	hpd-gpios = <&gpio 46 GPIO_ACTIVE_LOW>;
+};
diff --git a/arch/arm/boot/dts/bcm2835-rpi-b.dts b/arch/arm/boot/dts/bcm2835-rpi-b.dts
index 4859e9d..8b15f9c 100644
--- a/arch/arm/boot/dts/bcm2835-rpi-b.dts
+++ b/arch/arm/boot/dts/bcm2835-rpi-b.dts
@@ -16,3 +16,7 @@
 &gpio {
 	pinctrl-0 = <&gpioout &alt0 &alt3>;
 };
+
+&hdmi {
+	hpd-gpios = <&gpio 46 GPIO_ACTIVE_HIGH>;
+};
diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi
index 76bdbca..caf2707 100644
--- a/arch/arm/boot/dts/bcm2835-rpi.dtsi
+++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi
@@ -74,3 +74,12 @@
 &usb {
 	power-domains = <&power RPI_POWER_DOMAIN_USB>;
 };
+
+&v3d {
+	power-domains = <&power RPI_POWER_DOMAIN_V3D>;
+};
+
+&hdmi {
+	power-domains = <&power RPI_POWER_DOMAIN_HDMI>;
+	status = "okay";
+};
diff --git a/arch/arm/boot/dts/bcm2836-rpi-2-b.dts b/arch/arm/boot/dts/bcm2836-rpi-2-b.dts
index ff94666..c4743f4 100644
--- a/arch/arm/boot/dts/bcm2836-rpi-2-b.dts
+++ b/arch/arm/boot/dts/bcm2836-rpi-2-b.dts
@@ -33,3 +33,7 @@
 		brcm,function = <BCM2835_FSEL_ALT0>;
 	};
 };
+
+&hdmi {
+	hpd-gpios = <&gpio 46 GPIO_ACTIVE_LOW>;
+};
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index fc67964..bbe4eab 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -1,6 +1,7 @@
 #include <dt-bindings/pinctrl/bcm2835.h>
 #include <dt-bindings/clock/bcm2835.h>
 #include <dt-bindings/clock/bcm2835-aux.h>
+#include <dt-bindings/gpio/gpio.h>
 #include "skeleton.dtsi"
 
 /* This include file covers the common peripherals and configuration between
@@ -153,6 +154,18 @@
 			status = "disabled";
 		};
 
+		pixelvalve@7e206000 {
+			compatible = "brcm,bcm2835-pixelvalve0";
+			reg = <0x7e206000 0x100>;
+			interrupts = <2 13>; /* pwa0 */
+		};
+
+		pixelvalve@7e207000 {
+			compatible = "brcm,bcm2835-pixelvalve1";
+			reg = <0x7e207000 0x100>;
+			interrupts = <2 14>; /* pwa1 */
+		};
+
 		aux: aux@0x7e215000 {
 			compatible = "brcm,bcm2835-aux";
 			#clock-cells = <1>;
@@ -206,6 +219,12 @@
 			status = "disabled";
 		};
 
+		hvs@7e400000 {
+			compatible = "brcm,bcm2835-hvs";
+			reg = <0x7e400000 0x6000>;
+			interrupts = <2 1>;
+		};
+
 		i2c1: i2c@7e804000 {
 			compatible = "brcm,bcm2835-i2c";
 			reg = <0x7e804000 0x1000>;
@@ -226,11 +245,39 @@
 			status = "disabled";
 		};
 
+		pixelvalve@7e807000 {
+			compatible = "brcm,bcm2835-pixelvalve2";
+			reg = <0x7e807000 0x100>;
+			interrupts = <2 10>; /* pixelvalve */
+		};
+
+		hdmi: hdmi@7e902000 {
+			compatible = "brcm,bcm2835-hdmi";
+			reg = <0x7e902000 0x600>,
+			      <0x7e808000 0x100>;
+			interrupts = <2 8>, <2 9>;
+			ddc = <&i2c2>;
+			clocks = <&clocks BCM2835_PLLH_PIX>,
+				 <&clocks BCM2835_CLOCK_HSM>;
+			clock-names = "pixel", "hdmi";
+			status = "disabled";
+		};
+
 		usb: usb@7e980000 {
 			compatible = "brcm,bcm2835-usb";
 			reg = <0x7e980000 0x10000>;
 			interrupts = <1 9>;
 		};
+
+		v3d: v3d@7ec00000 {
+			compatible = "brcm,bcm2835-v3d";
+			reg = <0x7ec00000 0x1000>;
+			interrupts = <1 10>;
+		};
+
+		vc4: gpu {
+			compatible = "brcm,bcm2835-vc4";
+		};
 	};
 
 	clocks {
-- 
2.7.3

From da77f737f9f5a487f3a1f80f8546585ee18cd7b9 Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 4 Mar 2016 10:39:28 -0800
Subject: [PATCH 27/36] dt-bindings: Add root properties for Raspberry Pi 3

Signed-off-by: Eric Anholt <eric@anholt.net>
Acked-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt
index 11d3056..6ffe087 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm2835.txt
@@ -30,6 +30,10 @@ Raspberry Pi 2 Model B
 Required root node properties:
 compatible = "raspberrypi,2-model-b", "brcm,bcm2836";
 
+Raspberry Pi 3 Model B
+Required root node properties:
+compatible = "raspberrypi,3-model-b", "brcm,bcm2837";
+
 Raspberry Pi Compute Module
 Required root node properties:
 compatible = "raspberrypi,compute-module", "brcm,bcm2835";
-- 
2.7.3

From b76b1cdf2e569cceab41dcf3b3f6a90965d0a02c Mon Sep 17 00:00:00 2001
From: Eric Anholt <eric@anholt.net>
Date: Fri, 4 Mar 2016 10:39:29 -0800
Subject: [PATCH 28/36] ARM: bcm2835: Add devicetree for the Raspberry Pi 3.

For now this doesn't support the new hardware present on the Pi 3 (BT,
wifi, GPIO expander).  Since the GPIO expander isn't supported, we
also don't have the LEDs like the other board files do.

Signed-off-by: Eric Anholt <eric@anholt.net>
Acked-by: Stephen Warren <swarren@wwwdotorg.org>
---
 arch/arm/boot/dts/Makefile            |  3 +-
 arch/arm/boot/dts/bcm2837-rpi-3-b.dts | 22 ++++++++++++
 arch/arm/boot/dts/bcm2837.dtsi        | 68 +++++++++++++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 arch/arm/boot/dts/bcm2837-rpi-3-b.dts
 create mode 100644 arch/arm/boot/dts/bcm2837.dtsi

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index d000814..a8a0767 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile
@@ -64,7 +64,8 @@ dtb-$(CONFIG_ARCH_BCM2835) += \
 	bcm2835-rpi-b-rev2.dtb \
 	bcm2835-rpi-b-plus.dtb \
 	bcm2835-rpi-a-plus.dtb \
-	bcm2836-rpi-2-b.dtb
+	bcm2836-rpi-2-b.dtb \
+	bcm2837-rpi-3-b.dtb
 dtb-$(CONFIG_ARCH_BCM_5301X) += \
 	bcm4708-asus-rt-ac56u.dtb \
 	bcm4708-asus-rt-ac68u.dtb \
diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b.dts
new file mode 100644
index 0000000..5e8eafd
--- /dev/null
+++ b/arch/arm/boot/dts/bcm2837-rpi-3-b.dts
@@ -0,0 +1,22 @@
+/dts-v1/;
+#include "bcm2837.dtsi"
+#include "bcm2835-rpi.dtsi"
+
+/ {
+	compatible = "raspberrypi,3-model-b", "brcm,bcm2837";
+	model = "Raspberry Pi 3 Model B";
+
+	memory {
+		reg = <0 0x40000000>;
+	};
+};
+
+&gpio {
+	pinctrl-0 = <&gpioout &alt0 &i2s_alt0 &alt3>;
+
+	/* I2S interface */
+	i2s_alt0: i2s_alt0 {
+		brcm,pins = <28 29 30 31>;
+		brcm,function = <BCM2835_FSEL_ALT2>;
+	};
+};
diff --git a/arch/arm/boot/dts/bcm2837.dtsi b/arch/arm/boot/dts/bcm2837.dtsi
new file mode 100644
index 0000000..2f36722
--- /dev/null
+++ b/arch/arm/boot/dts/bcm2837.dtsi
@@ -0,0 +1,68 @@
+#include "bcm283x.dtsi"
+
+/ {
+	compatible = "brcm,bcm2836";
+
+	soc {
+		ranges = <0x7e000000 0x3f000000 0x1000000>,
+			 <0x40000000 0x40000000 0x00001000>;
+		dma-ranges = <0xc0000000 0x00000000 0x3f000000>;
+
+		local_intc: local_intc {
+			compatible = "brcm,bcm2836-l1-intc";
+			reg = <0x40000000 0x100>;
+			interrupt-controller;
+			#interrupt-cells = <1>;
+			interrupt-parent = <&local_intc>;
+		};
+	};
+
+	timer {
+		compatible = "arm,armv7-timer";
+		interrupt-parent = <&local_intc>;
+		interrupts = <0>, // PHYS_SECURE_PPI
+			     <1>, // PHYS_NONSECURE_PPI
+			     <3>, // VIRT_PPI
+			     <2>; // HYP_PPI
+		always-on;
+	};
+
+	cpus: cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		cpu0: cpu@0 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <0>;
+		};
+
+		cpu1: cpu@1 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <1>;
+		};
+
+		cpu2: cpu@2 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <2>;
+		};
+
+		cpu3: cpu@3 {
+			device_type = "cpu";
+			compatible = "arm,cortex-a53";
+			reg = <3>;
+		};
+	};
+};
+
+/* Make the BCM2835-style global interrupt controller be a child of the
+ * CPU-local interrupt controller.
+ */
+&intc {
+	compatible = "brcm,bcm2836-armctrl-ic";
+	reg = <0x7e00b200 0x200>;
+	interrupt-parent = <&local_intc>;
+	interrupts = <8>;
+};
-- 
2.7.3

From 43aa67b7bccfb189a3e57832f08710c98fe707c6 Mon Sep 17 00:00:00 2001
From: Martin Sperl <kernel@martin.sperl.org>
Date: Sun, 17 Jan 2016 12:15:28 +0000
Subject: [PATCH 29/36] ARM: bcm2835: follow dt uart node-naming convention

This patch fixes the naming of the device tree node: uart@7e201000
to conform to the standard of: serial@7e201000

Signed-off-by: Martin Sperl <kernel@martin.sperl.org>
---
 arch/arm/boot/dts/bcm283x.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index bbe4eab..31cc2f2 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -113,7 +113,7 @@
 			#interrupt-cells = <2>;
 		};
 
-		uart0: uart@7e201000 {
+		uart0: serial@7e201000 {
 			compatible = "brcm,bcm2835-pl011", "arm,pl011", "arm,primecell";
 			reg = <0x7e201000 0x1000>;
 			interrupts = <2 25>;
-- 
2.7.3

From 72b53a14be5ff0bda535faefa09bc9726acbe1ff Mon Sep 17 00:00:00 2001
From: Martin Sperl <kernel@martin.sperl.org>
Date: Sun, 17 Jan 2016 12:15:29 +0000
Subject: [PATCH 30/36] dt/bindings: serial: bcm2835: add binding documentation
 for bcm2835-aux-uart

Add binding documentation for the bcm2835-aux-uart driver.

Signed-off-by: Martin Sperl <kernel@martin.sperl.org>

Changelog:
	V2->V3: fixed naming convention for node
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Eric Anholt <eric@anholt.net>
---
 .../bindings/serial/brcm,bcm2835-aux-uart.txt          | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/serial/brcm,bcm2835-aux-uart.txt

diff --git a/Documentation/devicetree/bindings/serial/brcm,bcm2835-aux-uart.txt b/Documentation/devicetree/bindings/serial/brcm,bcm2835-aux-uart.txt
new file mode 100644
index 0000000..b5cc629
--- /dev/null
+++ b/Documentation/devicetree/bindings/serial/brcm,bcm2835-aux-uart.txt
@@ -0,0 +1,18 @@
+* BCM2835 AUXILIAR UART
+
+Required properties:
+
+- compatible: "brcm,bcm2835-aux-uart"
+- reg: The base address of the UART register bank.
+- interrupts: A single interrupt specifier.
+- clocks: Clock driving the hardware; used to figure out the baud rate
+  divisor.
+
+Example:
+
+	uart1: serial@7e215040 {
+		compatible = "brcm,bcm2835-aux-uart";
+		reg = <0x7e215040 0x40>;
+		interrupts = <1 29>;
+		clocks = <&aux BCM2835_AUX_CLOCK_UART>;
+	};
-- 
2.7.3

From 285a4ac466d3712b50f5c0d29bf5874476f00c30 Mon Sep 17 00:00:00 2001
From: Martin Sperl <kernel@martin.sperl.org>
Date: Sun, 17 Jan 2016 12:15:30 +0000
Subject: [PATCH 31/36] serial: bcm2835: add driver for bcm2835-aux-uart

The bcm2835 SOC contains an auxiliary uart, which is very close
to the ns16550 with some differences.

The big difference is that the uart HW is not using an internal divider
of 16 but 8, which results in an effictive baud-rate being twice
the requested baud-rate.

This driver handles this device correctly and handles the difference in
the HW divider by scaling up the clock by a factor of 2.

The approach to write a separate (wrapper) driver instead of using a
multiplying clock and "ns16550" as compatibility in the device-tree
has been recommended by Stephen Warren.

Signed-off-by: Martin Sperl <kernel@martin.sperl.org>

Changelog:
	V1->V2: made an explicit bcm2835-aux-uart driver
		not conrolling the settings via DT only
	V2->V3: added comments on UART capabilities
		applied recommendations by Stefan Wahren
		keep registered line-id in bcm2835aux_data
Acked-by: Eric Anholt <eric@anholt.net>
---
 drivers/tty/serial/8250/8250_bcm2835aux.c | 146 ++++++++++++++++++++++++++++++
 drivers/tty/serial/8250/Kconfig           |  24 +++++
 drivers/tty/serial/8250/Makefile          |   1 +
 3 files changed, 171 insertions(+)
 create mode 100644 drivers/tty/serial/8250/8250_bcm2835aux.c

diff --git a/drivers/tty/serial/8250/8250_bcm2835aux.c b/drivers/tty/serial/8250/8250_bcm2835aux.c
new file mode 100644
index 0000000..ecf89f1
--- /dev/null
+++ b/drivers/tty/serial/8250/8250_bcm2835aux.c
@@ -0,0 +1,146 @@
+/*
+ * Serial port driver for BCM2835AUX UART
+ *
+ * Copyright (C) 2016 Martin Sperl <kernel@martin.sperl.org>
+ *
+ * Based on 8250_lpc18xx.c:
+ * Copyright (C) 2015 Joachim Eastwood <manabian@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+#include "8250.h"
+
+struct bcm2835aux_data {
+	struct uart_8250_port uart;
+	struct clk *clk;
+	int line;
+};
+
+static int bcm2835aux_serial_probe(struct platform_device *pdev)
+{
+	struct bcm2835aux_data *data;
+	struct resource *res;
+	int ret;
+
+	/* allocate the custom structure */
+	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* initialize data */
+	spin_lock_init(&data->uart.port.lock);
+	data->uart.capabilities = UART_CAP_FIFO;
+	data->uart.port.dev = &pdev->dev;
+	data->uart.port.regshift = 2;
+	data->uart.port.type = PORT_16550;
+	data->uart.port.iotype = UPIO_MEM;
+	data->uart.port.fifosize = 8;
+	data->uart.port.flags = UPF_SHARE_IRQ |
+				UPF_FIXED_PORT |
+				UPF_FIXED_TYPE |
+				UPF_SKIP_TEST;
+
+	/* get the clock - this also enables the HW */
+	data->clk = devm_clk_get(&pdev->dev, NULL);
+	ret = PTR_ERR_OR_ZERO(data->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "could not get clk: %d\n", ret);
+		return ret;
+	}
+
+	/* get the interrupt */
+	data->uart.port.irq = platform_get_irq(pdev, 0);
+	if (data->uart.port.irq < 0) {
+		dev_err(&pdev->dev, "irq not found - %i",
+			data->uart.port.irq);
+		return data->uart.port.irq;
+	}
+
+	/* map the main registers */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "memory resource not found");
+		return -EINVAL;
+	}
+	data->uart.port.membase = devm_ioremap_resource(&pdev->dev, res);
+	ret = PTR_ERR_OR_ZERO(data->uart.port.membase);
+	if (ret)
+		return ret;
+
+	/* Check for a fixed line number */
+	ret = of_alias_get_id(pdev->dev.of_node, "serial");
+	if (ret >= 0)
+		data->uart.port.line = ret;
+
+	/* enable the clock as a last step */
+	ret = clk_prepare_enable(data->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "unable to enable uart clock - %d\n",
+			ret);
+		return ret;
+	}
+
+	/* the HW-clock divider for bcm2835aux is 8,
+	 * but 8250 expects a divider of 16,
+	 * so we have to multiply the actual clock by 2
+	 * to get identical baudrates.
+	 */
+	data->uart.port.uartclk = clk_get_rate(data->clk) * 2;
+
+	/* register the port */
+	ret = serial8250_register_8250_port(&data->uart);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "unable to register 8250 port - %d\n",
+			ret);
+		goto dis_clk;
+	}
+	data->line = ret;
+
+	platform_set_drvdata(pdev, data);
+
+	return 0;
+
+dis_clk:
+	clk_disable_unprepare(data->clk);
+	return ret;
+}
+
+static int bcm2835aux_serial_remove(struct platform_device *pdev)
+{
+	struct bcm2835aux_data *data = platform_get_drvdata(pdev);
+
+	serial8250_unregister_port(data->uart.port.line);
+	clk_disable_unprepare(data->clk);
+
+	return 0;
+}
+
+static const struct of_device_id bcm2835aux_serial_match[] = {
+	{ .compatible = "brcm,bcm2835-aux-uart" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, bcm2835aux_serial_match);
+
+static struct platform_driver bcm2835aux_serial_driver = {
+	.driver = {
+		.name = "bcm2835-aux-uart",
+		.of_match_table = bcm2835aux_serial_match,
+	},
+	.probe  = bcm2835aux_serial_probe,
+	.remove = bcm2835aux_serial_remove,
+};
+module_platform_driver(bcm2835aux_serial_driver);
+
+MODULE_DESCRIPTION("BCM2835 auxiliar UART driver");
+MODULE_AUTHOR("Martin Sperl <kernel@martin.sperl.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
index b03cb517..67ad6b0 100644
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -272,6 +272,30 @@ config SERIAL_8250_ACORN
 	  system, say Y to this option.  The driver can handle 1, 2, or 3 port
 	  cards.  If unsure, say N.
 
+config SERIAL_8250_BCM2835AUX
+	tristate "BCM2835 auxiliar mini UART support"
+	depends on ARCH_BCM2835 || COMPILE_TEST
+	depends on SERIAL_8250 && SERIAL_8250_SHARE_IRQ
+	help
+	  Support for the BCM2835 auxiliar mini UART.
+
+	  Features and limitations of the UART are
+	    Registers are similar to 16650 registers,
+              set bits in the control registers that are unsupported
+	      are ignored and read back as 0
+	    7/8 bit operation with 1 start and 1 stop bit
+	    8 symbols deep fifo for rx and tx
+	    SW controlled RTS and SW readable CTS
+	    Clock rate derived from system clock
+	    Uses 8 times oversampling (compared to 16 times for 16650)
+	    Missing break detection (but break generation)
+	    Missing framing error detection
+	    Missing parity bit
+	    Missing receive time-out interrupt
+	    Missing DCD, DSR, DTR and RI signals
+
+	  If unsure, say N.
+
 config SERIAL_8250_FSL
 	bool
 	depends on SERIAL_8250_CONSOLE
diff --git a/drivers/tty/serial/8250/Makefile b/drivers/tty/serial/8250/Makefile
index b9b9bca..5c1869f 100644
--- a/drivers/tty/serial/8250/Makefile
+++ b/drivers/tty/serial/8250/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_SERIAL_8250_PCI)		+= 8250_pci.o
 obj-$(CONFIG_SERIAL_8250_HP300)		+= 8250_hp300.o
 obj-$(CONFIG_SERIAL_8250_CS)		+= serial_cs.o
 obj-$(CONFIG_SERIAL_8250_ACORN)		+= 8250_acorn.o
+obj-$(CONFIG_SERIAL_8250_BCM2835AUX)	+= 8250_bcm2835aux.o
 obj-$(CONFIG_SERIAL_8250_CONSOLE)	+= 8250_early.o
 obj-$(CONFIG_SERIAL_8250_FOURPORT)	+= 8250_fourport.o
 obj-$(CONFIG_SERIAL_8250_ACCENT)	+= 8250_accent.o
-- 
2.7.3

From 528285e99c25249456023d28f521689bf9e9eb8b Mon Sep 17 00:00:00 2001
From: Peter Robinson <pbrobinson@gmail.com>
Date: Wed, 30 Mar 2016 09:35:13 +0100
Subject: [PATCH 32/36] drop usb power domain support for the moment, kills usb

---
 arch/arm/boot/dts/bcm2835-rpi.dtsi | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi
index caf2707..b1e8145 100644
--- a/arch/arm/boot/dts/bcm2835-rpi.dtsi
+++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi
@@ -71,10 +71,6 @@
 	status = "okay";
 };
 
-&usb {
-	power-domains = <&power RPI_POWER_DOMAIN_USB>;
-};
-
 &v3d {
 	power-domains = <&power RPI_POWER_DOMAIN_V3D>;
 };
-- 
2.7.3

From 6af83c5ff7f5514f32b1b3fa6d8d7dfe77e3acce Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Sun, 17 Jan 2016 14:59:00 +0000
Subject: [PATCH 33/36] mmc: sdhci-iproc: Clean up platform allocations if
 shdci init fails

This patch adopts the changes from 475c9e43bfa7 ("mmc: sdhci-bcm2835:
Clean up platform allocations if sdhci init fails") to sdhci-iproc.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Acked-by: Scott Branden <sbranden@broadcom.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-iproc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index 3b423b0..e22060a 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -213,7 +213,11 @@ static int sdhci_iproc_probe(struct platform_device *pdev)
 		host->caps1 = iproc_host->data->caps1;
 	}
 
-	return sdhci_add_host(host);
+	ret = sdhci_add_host(host);
+	if (ret)
+		goto err;
+
+	return 0;
 
 err:
 	sdhci_pltfm_free(pdev);
-- 
2.7.3

From 1565145761d5d94991e4763001c9e60c655818f1 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Sun, 17 Jan 2016 14:59:01 +0000
Subject: [PATCH 34/36] mmc: sdhci-iproc: Actually enable the clock

The RPi firmware-based clocks driver can actually disable
unused clocks, so when switching to use it we ended up losing
our MMC clock once all devices were probed.

This patch adopts the changes from 1e5a0a9a58e2 ("mmc: sdhci-bcm2835:
Actually enable the clock") to sdhci-iproc.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Acked-by: Scott Branden <sbranden@broadcom.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-iproc.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index e22060a..55bc348 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -207,6 +207,11 @@ static int sdhci_iproc_probe(struct platform_device *pdev)
 		ret = PTR_ERR(pltfm_host->clk);
 		goto err;
 	}
+	ret = clk_prepare_enable(pltfm_host->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to enable host clk\n");
+		goto err;
+	}
 
 	if (iproc_host->data->pdata->quirks & SDHCI_QUIRK_MISSING_CAPS) {
 		host->caps = iproc_host->data->caps;
@@ -215,10 +220,12 @@ static int sdhci_iproc_probe(struct platform_device *pdev)
 
 	ret = sdhci_add_host(host);
 	if (ret)
-		goto err;
+		goto err_clk;
 
 	return 0;
 
+err_clk:
+	clk_disable_unprepare(pltfm_host->clk);
 err:
 	sdhci_pltfm_free(pdev);
 	return ret;
-- 
2.7.3

From 49ebf153a97a0840c1e54f934411aceb93bbdee4 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Wed, 27 Jan 2016 22:25:40 +0000
Subject: [PATCH 35/36] mmc: sdhci-iproc: define MMC caps in platform data

This patch moves the definition of the MMC capabilities
from the probe function into iproc platform data. After
that we are able to add support for another platform more
easily.

Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Suggested-by: Stephen Warren <swarren@wwwdotorg.org>
Acked-by: Scott Branden <sbranden@broadcom.com>
Acked-by: Stephen Warren <swarren@wwwdotorg.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-iproc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index 55bc348..cdc6c4a 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -26,6 +26,7 @@ struct sdhci_iproc_data {
 	const struct sdhci_pltfm_data *pdata;
 	u32 caps;
 	u32 caps1;
+	u32 mmc_caps;
 };
 
 struct sdhci_iproc_host {
@@ -165,6 +166,7 @@ static const struct sdhci_iproc_data iproc_data = {
 	.pdata = &sdhci_iproc_pltfm_data,
 	.caps = 0x05E90000,
 	.caps1 = 0x00000064,
+	.mmc_caps = MMC_CAP_1_8V_DDR,
 };
 
 static const struct of_device_id sdhci_iproc_of_match[] = {
@@ -199,8 +201,7 @@ static int sdhci_iproc_probe(struct platform_device *pdev)
 	mmc_of_parse(host->mmc);
 	sdhci_get_of_property(pdev);
 
-	/* Enable EMMC 1/8V DDR capable */
-	host->mmc->caps |= MMC_CAP_1_8V_DDR;
+	host->mmc->caps |= iproc_host->data->mmc_caps;
 
 	pltfm_host->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(pltfm_host->clk)) {
-- 
2.7.3

From 208897fb02fa78d06f960916bc3781c8a060ab72 Mon Sep 17 00:00:00 2001
From: Stefan Wahren <stefan.wahren@i2se.com>
Date: Wed, 27 Jan 2016 22:25:41 +0000
Subject: [PATCH 36/36] mmc: sdhci-iproc: add bcm2835 support

Scott Branden from Broadcom said that the BCM2835 eMMC IP core is
very similar to IPROC and share most of the quirks. So use this driver
instead of separate one.

The sdhci-iproc contains a better workaround for the clock domain
crossing problem which doesn't need any delays. This results in a
better write performance.

Btw we get the rid of the SDHCI_CAPABILITIES hack in the sdhci_readl
function.

Suggested-by: Scott Branden <sbranden@broadcom.com>
Signed-off-by: Stefan Wahren <stefan.wahren@i2se.com>
Acked-by: Eric Anholt <eric@anholt.net>
Acked-by: Scott Branden <sbranden@broadcom.com>
Acked-by: Stephen Warren <swarren@wwwdotorg.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/Kconfig       |  6 +++---
 drivers/mmc/host/sdhci-iproc.c | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 1526b8a..60de1e4 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -318,15 +318,15 @@ config MMC_SDHCI_F_SDH30
 	  If unsure, say N.
 
 config MMC_SDHCI_IPROC
-	tristate "SDHCI platform support for the iProc SD/MMC Controller"
-	depends on ARCH_BCM_IPROC || COMPILE_TEST
+	tristate "SDHCI support for the BCM2835 & iProc SD/MMC Controller"
+	depends on ARCH_BCM2835 || ARCH_BCM_IPROC || COMPILE_TEST
 	depends on MMC_SDHCI_PLTFM
 	default ARCH_BCM_IPROC
 	select MMC_SDHCI_IO_ACCESSORS
 	help
 	  This selects the iProc SD/MMC controller.
 
-	  If you have an IPROC platform with SD or MMC devices,
+	  If you have a BCM2835 or IPROC platform with SD or MMC devices,
 	  say Y or M here.
 
 	  If unsure, say N.
diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c
index cdc6c4a..871c92c 100644
--- a/drivers/mmc/host/sdhci-iproc.c
+++ b/drivers/mmc/host/sdhci-iproc.c
@@ -169,7 +169,22 @@ static const struct sdhci_iproc_data iproc_data = {
 	.mmc_caps = MMC_CAP_1_8V_DDR,
 };
 
+static const struct sdhci_pltfm_data sdhci_bcm2835_pltfm_data = {
+	.quirks = SDHCI_QUIRK_BROKEN_CARD_DETECTION |
+		  SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
+		  SDHCI_QUIRK_MISSING_CAPS,
+	.ops = &sdhci_iproc_ops,
+};
+
+static const struct sdhci_iproc_data bcm2835_data = {
+	.pdata = &sdhci_bcm2835_pltfm_data,
+	.caps = SDHCI_CAN_VDD_330,
+	.caps1 = 0x00000000,
+	.mmc_caps = 0x00000000,
+};
+
 static const struct of_device_id sdhci_iproc_of_match[] = {
+	{ .compatible = "brcm,bcm2835-sdhci", .data = &bcm2835_data },
 	{ .compatible = "brcm,sdhci-iproc-cygnus", .data = &iproc_data },
 	{ }
 };
-- 
2.7.3