diff options
author | Maarten Lankhorst | 2019-06-26 12:22:54 +0200 |
---|---|---|
committer | Maarten Lankhorst | 2019-06-26 12:22:54 +0200 |
commit | 355a47ae7ebcf9d605aa809b259d380422e81b8d (patch) | |
tree | f42b3196986c7de89a335b4f57384d249f5663e9 /drivers/gpu/drm/amd/amdkfd | |
parent | drm/bridge: tfp410: fix use of cancel_delayed_work_sync (diff) | |
parent | drm/edid: use for_each_displayid_db where applicable (diff) | |
download | kernel-qcow2-linux-355a47ae7ebcf9d605aa809b259d380422e81b8d.tar.gz kernel-qcow2-linux-355a47ae7ebcf9d605aa809b259d380422e81b8d.tar.xz kernel-qcow2-linux-355a47ae7ebcf9d605aa809b259d380422e81b8d.zip |
Merge remote-tracking branch 'drm/drm-next' into drm-misc-next-fixes
Some fixes have been accidentally pushed to this, so I cannot fost-forward.
Required to pull in the remove-fbcon-notifiers fixes.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
30 files changed, 991 insertions, 572 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index c3613604a4f8..a1a35d4d594b 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only # # Heterogenous system architecture configuration # diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h index 3621efbd5759..e413d4a71fa3 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h @@ -21,7 +21,7 @@ */ static const uint32_t cwsr_trap_gfx8_hex[] = { - 0xbf820001, 0xbf82012b, + 0xbf820001, 0xbf820121, 0xb8f4f802, 0x89748674, 0xb8f5f803, 0x8675ff75, 0x00000400, 0xbf850017, @@ -36,12 +36,7 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { 0x8671ff71, 0x0000ffff, 0x8f728374, 0xb972e0c2, 0xbf800002, 0xb9740002, - 0xbe801f70, 0xb8f5f803, - 0x8675ff75, 0x00000100, - 0xbf840006, 0xbefa0080, - 0xb97a0203, 0x8671ff71, - 0x0000ffff, 0x80f08870, - 0x82f18071, 0xbefa0080, + 0xbe801f70, 0xbefa0080, 0xb97a0283, 0xbef60068, 0xbef70069, 0xb8fa1c07, 0x8e7a9c7a, 0x87717a71, @@ -279,15 +274,17 @@ static const uint32_t cwsr_trap_gfx8_hex[] = { static const uint32_t cwsr_trap_gfx9_hex[] = { - 0xbf820001, 0xbf82015d, + 0xbf820001, 0xbf82015e, 0xb8f8f802, 0x89788678, - 0xb8f1f803, 0x866eff71, - 0x00000400, 0xbf850037, - 0x866eff71, 0x00000800, - 0xbf850003, 0x866eff71, - 0x00000100, 0xbf840008, + 0xb8fbf803, 0x866eff7b, + 0x00000400, 0xbf85003b, + 0x866eff7b, 0x00000800, + 0xbf850003, 0x866eff7b, + 0x00000100, 0xbf84000c, 0x866eff78, 0x00002000, - 0xbf840001, 0xbf810000, + 0xbf840005, 0xbf8e0010, + 0xb8eef803, 0x866eff6e, + 0x00000400, 0xbf84fffb, 0x8778ff78, 0x00002000, 0x80ec886c, 0x82ed806d, 0xb8eef807, 0x866fff6e, @@ -295,13 +292,13 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x8977ff77, 0xfc000000, 0x87776f77, 0x896eff6e, 0x001f8000, 0xb96ef807, - 0xb8f0f812, 0xb8f1f813, - 0x8ef08870, 0xc0071bb8, + 0xb8faf812, 0xb8fbf813, + 0x8efa887a, 0xc0071bbd, 0x00000000, 0xbf8cc07f, - 0xc0071c38, 0x00000008, + 0xc0071ebd, 0x00000008, 0xbf8cc07f, 0x86ee6e6e, 0xbf840001, 0xbe801d6e, - 0xb8f1f803, 0x8671ff71, + 0xb8fbf803, 0x867bff7b, 0x000001ff, 0xbf850002, 0x806c846c, 0x826d806d, 0x866dff6d, 0x0000ffff, @@ -311,258 +308,256 @@ static const uint32_t cwsr_trap_gfx9_hex[] = { 0x8f6e8378, 0xb96ee0c2, 0xbf800002, 0xb9780002, 0xbe801f6c, 0x866dff6d, - 0x0000ffff, 0xbef00080, - 0xb9700283, 0xb8f02407, - 0x8e709c70, 0x876d706d, - 0xb8f003c7, 0x8e709b70, - 0x876d706d, 0xb8f0f807, - 0x8670ff70, 0x00007fff, - 0xb970f807, 0xbeee007e, + 0x0000ffff, 0xbefa0080, + 0xb97a0283, 0xb8fa2407, + 0x8e7a9b7a, 0x876d7a6d, + 0xb8fa03c7, 0x8e7a9a7a, + 0x876d7a6d, 0xb8faf807, + 0x867aff7a, 0x00007fff, + 0xb97af807, 0xbeee007e, 0xbeef007f, 0xbefe0180, - 0xbf900004, 0x87708478, - 0xb970f802, 0xbf8e0002, - 0xbf88fffe, 0xb8f02a05, + 0xbf900004, 0x877a8478, + 0xb97af802, 0xbf8e0002, + 0xbf88fffe, 0xb8fa2a05, + 0x807a817a, 0x8e7a8a7a, + 0xb8fb1605, 0x807b817b, + 0x8e7b867b, 0x807a7b7a, + 0x807a7e7a, 0x827b807f, + 0x867bff7b, 0x0000ffff, + 0xc04b1c3d, 0x00000050, + 0xbf8cc07f, 0xc04b1d3d, + 0x00000060, 0xbf8cc07f, + 0xc0431e7d, 0x00000074, + 0xbf8cc07f, 0xbef4007e, + 0x8675ff7f, 0x0000ffff, + 0x8775ff75, 0x00040000, + 0xbef60080, 0xbef700ff, + 0x00807fac, 0x867aff7f, + 0x08000000, 0x8f7a837a, + 0x87777a77, 0x867aff7f, + 0x70000000, 0x8f7a817a, + 0x87777a77, 0xbef1007c, + 0xbef00080, 0xb8f02a05, 0x80708170, 0x8e708a70, - 0xb8f11605, 0x80718171, - 0x8e718671, 0x80707170, - 0x80707e70, 0x8271807f, - 0x8671ff71, 0x0000ffff, - 0xc0471cb8, 0x00000040, - 0xbf8cc07f, 0xc04b1d38, - 0x00000048, 0xbf8cc07f, - 0xc0431e78, 0x00000058, - 0xbf8cc07f, 0xc0471eb8, - 0x0000005c, 0xbf8cc07f, - 0xbef4007e, 0x8675ff7f, - 0x0000ffff, 0x8775ff75, - 0x00040000, 0xbef60080, - 0xbef700ff, 0x00807fac, - 0x8670ff7f, 0x08000000, - 0x8f708370, 0x87777077, - 0x8670ff7f, 0x70000000, - 0x8f708170, 0x87777077, - 0xbefb007c, 0xbefa0080, - 0xb8fa2a05, 0x807a817a, - 0x8e7a8a7a, 0xb8f01605, - 0x80708170, 0x8e708670, - 0x807a707a, 0xbef60084, - 0xbef600ff, 0x01000000, - 0xbefe007c, 0xbefc007a, - 0xc0611efa, 0x0000007c, - 0xbf8cc07f, 0x807a847a, - 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611b3a, + 0xb8fa1605, 0x807a817a, + 0x8e7a867a, 0x80707a70, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xbefe007c, + 0xbefc0070, 0xc0611c7a, 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611b7a, 0x0000007c, - 0xbf8cc07f, 0x807a847a, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611b3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611bba, + 0xbefc0070, 0xc0611b7a, 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611bfa, 0x0000007c, - 0xbf8cc07f, 0x807a847a, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611bba, 0x0000007c, + 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611e3a, + 0xbefc0070, 0xc0611bfa, 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xb8f1f803, 0xbefe007c, - 0xbefc007a, 0xc0611c7a, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xbefe007c, 0xbefc007a, - 0xc0611a3a, 0x0000007c, - 0xbf8cc07f, 0x807a847a, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611e3a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8fbf803, + 0xbefe007c, 0xbefc0070, + 0xc0611efa, 0x0000007c, + 0xbf8cc07f, 0x80708470, 0xbefc007e, 0xbefe007c, - 0xbefc007a, 0xc0611a7a, - 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0xb8fbf801, 0xbefe007c, - 0xbefc007a, 0xc0611efa, + 0xbefc0070, 0xc0611a3a, 0x0000007c, 0xbf8cc07f, - 0x807a847a, 0xbefc007e, - 0x8670ff7f, 0x04000000, - 0xbeef0080, 0x876f6f70, - 0xb8fa2a05, 0x807a817a, - 0x8e7a8a7a, 0xb8f11605, - 0x80718171, 0x8e718471, - 0x8e768271, 0xbef600ff, - 0x01000000, 0xbef20174, - 0x80747a74, 0x82758075, - 0xbefc0080, 0xbf800000, - 0xbe802b00, 0xbe822b02, - 0xbe842b04, 0xbe862b06, - 0xbe882b08, 0xbe8a2b0a, - 0xbe8c2b0c, 0xbe8e2b0e, - 0xc06b003a, 0x00000000, - 0xbf8cc07f, 0xc06b013a, - 0x00000010, 0xbf8cc07f, - 0xc06b023a, 0x00000020, - 0xbf8cc07f, 0xc06b033a, - 0x00000030, 0xbf8cc07f, - 0x8074c074, 0x82758075, - 0x807c907c, 0xbf0a717c, - 0xbf85ffe7, 0xbef40172, - 0xbefa0080, 0xbefe00c1, - 0xbeff00c1, 0xbee80080, - 0xbee90080, 0xbef600ff, - 0x01000000, 0xe0724000, - 0x7a1d0000, 0xe0724100, - 0x7a1d0100, 0xe0724200, - 0x7a1d0200, 0xe0724300, - 0x7a1d0300, 0xbefe00c1, - 0xbeff00c1, 0xb8f14306, - 0x8671c171, 0xbf84002c, - 0xbf8a0000, 0x8670ff6f, - 0x04000000, 0xbf840028, - 0x8e718671, 0x8e718271, - 0xbef60071, 0xb8fa2a05, - 0x807a817a, 0x8e7a8a7a, - 0xb8f01605, 0x80708170, - 0x8e708670, 0x807a707a, - 0x807aff7a, 0x00000080, + 0x80708470, 0xbefc007e, + 0xbefe007c, 0xbefc0070, + 0xc0611a7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0xb8f1f801, + 0xbefe007c, 0xbefc0070, + 0xc0611c7a, 0x0000007c, + 0xbf8cc07f, 0x80708470, + 0xbefc007e, 0x867aff7f, + 0x04000000, 0xbeef0080, + 0x876f6f7a, 0xb8f02a05, + 0x80708170, 0x8e708a70, + 0xb8fb1605, 0x807b817b, + 0x8e7b847b, 0x8e76827b, 0xbef600ff, 0x01000000, - 0xbefc0080, 0xd28c0002, - 0x000100c1, 0xd28d0003, - 0x000204c1, 0xd1060002, - 0x00011103, 0x7e0602ff, - 0x00000200, 0xbefc00ff, - 0x00010000, 0xbe800077, - 0x8677ff77, 0xff7fffff, - 0x8777ff77, 0x00058000, - 0xd8ec0000, 0x00000002, - 0xbf8cc07f, 0xe0765000, - 0x7a1d0002, 0x68040702, - 0xd0c9006a, 0x0000e302, - 0xbf87fff7, 0xbef70000, - 0xbefa00ff, 0x00000400, + 0xbef20174, 0x80747074, + 0x82758075, 0xbefc0080, + 0xbf800000, 0xbe802b00, + 0xbe822b02, 0xbe842b04, + 0xbe862b06, 0xbe882b08, + 0xbe8a2b0a, 0xbe8c2b0c, + 0xbe8e2b0e, 0xc06b003a, + 0x00000000, 0xbf8cc07f, + 0xc06b013a, 0x00000010, + 0xbf8cc07f, 0xc06b023a, + 0x00000020, 0xbf8cc07f, + 0xc06b033a, 0x00000030, + 0xbf8cc07f, 0x8074c074, + 0x82758075, 0x807c907c, + 0xbf0a7b7c, 0xbf85ffe7, + 0xbef40172, 0xbef00080, 0xbefe00c1, 0xbeff00c1, - 0xb8f12a05, 0x80718171, - 0x8e718271, 0x8e768871, + 0xbee80080, 0xbee90080, 0xbef600ff, 0x01000000, - 0xbefc0084, 0xbf0a717c, - 0xbf840015, 0xbf11017c, - 0x8071ff71, 0x00001000, - 0x7e000300, 0x7e020301, - 0x7e040302, 0x7e060303, - 0xe0724000, 0x7a1d0000, - 0xe0724100, 0x7a1d0100, - 0xe0724200, 0x7a1d0200, - 0xe0724300, 0x7a1d0300, - 0x807c847c, 0x807aff7a, - 0x00000400, 0xbf0a717c, - 0xbf85ffef, 0xbf9c0000, - 0xbf8200dc, 0xbef4007e, - 0x8675ff7f, 0x0000ffff, - 0x8775ff75, 0x00040000, - 0xbef60080, 0xbef700ff, - 0x00807fac, 0x866eff7f, - 0x08000000, 0x8f6e836e, - 0x87776e77, 0x866eff7f, - 0x70000000, 0x8f6e816e, - 0x87776e77, 0x866eff7f, - 0x04000000, 0xbf84001e, + 0xe0724000, 0x701d0000, + 0xe0724100, 0x701d0100, + 0xe0724200, 0x701d0200, + 0xe0724300, 0x701d0300, 0xbefe00c1, 0xbeff00c1, - 0xb8ef4306, 0x866fc16f, - 0xbf840019, 0x8e6f866f, - 0x8e6f826f, 0xbef6006f, - 0xb8f82a05, 0x80788178, - 0x8e788a78, 0xb8ee1605, - 0x806e816e, 0x8e6e866e, - 0x80786e78, 0x8078ff78, + 0xb8fb4306, 0x867bc17b, + 0xbf84002c, 0xbf8a0000, + 0x867aff6f, 0x04000000, + 0xbf840028, 0x8e7b867b, + 0x8e7b827b, 0xbef6007b, + 0xb8f02a05, 0x80708170, + 0x8e708a70, 0xb8fa1605, + 0x807a817a, 0x8e7a867a, + 0x80707a70, 0x8070ff70, 0x00000080, 0xbef600ff, 0x01000000, 0xbefc0080, - 0xe0510000, 0x781d0000, - 0xe0510100, 0x781d0000, - 0x807cff7c, 0x00000200, - 0x8078ff78, 0x00000200, - 0xbf0a6f7c, 0xbf85fff6, - 0xbef80080, 0xbefe00c1, - 0xbeff00c1, 0xb8ef2a05, - 0x806f816f, 0x8e6f826f, - 0x8e76886f, 0xbef600ff, - 0x01000000, 0xbeee0078, - 0x8078ff78, 0x00000400, - 0xbefc0084, 0xbf11087c, - 0x806fff6f, 0x00008000, - 0xe0524000, 0x781d0000, - 0xe0524100, 0x781d0100, - 0xe0524200, 0x781d0200, - 0xe0524300, 0x781d0300, - 0xbf8c0f70, 0x7e000300, + 0xd28c0002, 0x000100c1, + 0xd28d0003, 0x000204c1, + 0xd1060002, 0x00011103, + 0x7e0602ff, 0x00000200, + 0xbefc00ff, 0x00010000, + 0xbe800077, 0x8677ff77, + 0xff7fffff, 0x8777ff77, + 0x00058000, 0xd8ec0000, + 0x00000002, 0xbf8cc07f, + 0xe0765000, 0x701d0002, + 0x68040702, 0xd0c9006a, + 0x0000f702, 0xbf87fff7, + 0xbef70000, 0xbef000ff, + 0x00000400, 0xbefe00c1, + 0xbeff00c1, 0xb8fb2a05, + 0x807b817b, 0x8e7b827b, + 0x8e76887b, 0xbef600ff, + 0x01000000, 0xbefc0084, + 0xbf0a7b7c, 0xbf840015, + 0xbf11017c, 0x807bff7b, + 0x00001000, 0x7e000300, 0x7e020301, 0x7e040302, - 0x7e060303, 0x807c847c, - 0x8078ff78, 0x00000400, - 0xbf0a6f7c, 0xbf85ffee, - 0xbf9c0000, 0xe0524000, - 0x6e1d0000, 0xe0524100, - 0x6e1d0100, 0xe0524200, - 0x6e1d0200, 0xe0524300, - 0x6e1d0300, 0xb8f82a05, + 0x7e060303, 0xe0724000, + 0x701d0000, 0xe0724100, + 0x701d0100, 0xe0724200, + 0x701d0200, 0xe0724300, + 0x701d0300, 0x807c847c, + 0x8070ff70, 0x00000400, + 0xbf0a7b7c, 0xbf85ffef, + 0xbf9c0000, 0xbf8200da, + 0xbef4007e, 0x8675ff7f, + 0x0000ffff, 0x8775ff75, + 0x00040000, 0xbef60080, + 0xbef700ff, 0x00807fac, + 0x866eff7f, 0x08000000, + 0x8f6e836e, 0x87776e77, + 0x866eff7f, 0x70000000, + 0x8f6e816e, 0x87776e77, + 0x866eff7f, 0x04000000, + 0xbf84001e, 0xbefe00c1, + 0xbeff00c1, 0xb8ef4306, + 0x866fc16f, 0xbf840019, + 0x8e6f866f, 0x8e6f826f, + 0xbef6006f, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, 0x80786e78, - 0x80f8c078, 0xb8ef1605, - 0x806f816f, 0x8e6f846f, - 0x8e76826f, 0xbef600ff, - 0x01000000, 0xbefc006f, - 0xc031003a, 0x00000078, - 0x80f8c078, 0xbf8cc07f, - 0x80fc907c, 0xbf800000, - 0xbe802d00, 0xbe822d02, - 0xbe842d04, 0xbe862d06, - 0xbe882d08, 0xbe8a2d0a, - 0xbe8c2d0c, 0xbe8e2d0e, - 0xbf06807c, 0xbf84fff0, + 0x8078ff78, 0x00000080, + 0xbef600ff, 0x01000000, + 0xbefc0080, 0xe0510000, + 0x781d0000, 0xe0510100, + 0x781d0000, 0x807cff7c, + 0x00000200, 0x8078ff78, + 0x00000200, 0xbf0a6f7c, + 0xbf85fff6, 0xbef80080, + 0xbefe00c1, 0xbeff00c1, + 0xb8ef2a05, 0x806f816f, + 0x8e6f826f, 0x8e76886f, + 0xbef600ff, 0x01000000, + 0xbeee0078, 0x8078ff78, + 0x00000400, 0xbefc0084, + 0xbf11087c, 0x806fff6f, + 0x00008000, 0xe0524000, + 0x781d0000, 0xe0524100, + 0x781d0100, 0xe0524200, + 0x781d0200, 0xe0524300, + 0x781d0300, 0xbf8c0f70, + 0x7e000300, 0x7e020301, + 0x7e040302, 0x7e060303, + 0x807c847c, 0x8078ff78, + 0x00000400, 0xbf0a6f7c, + 0xbf85ffee, 0xbf9c0000, + 0xe0524000, 0x6e1d0000, + 0xe0524100, 0x6e1d0100, + 0xe0524200, 0x6e1d0200, + 0xe0524300, 0x6e1d0300, 0xb8f82a05, 0x80788178, 0x8e788a78, 0xb8ee1605, 0x806e816e, 0x8e6e866e, - 0x80786e78, 0xbef60084, + 0x80786e78, 0x80f8c078, + 0xb8ef1605, 0x806f816f, + 0x8e6f846f, 0x8e76826f, 0xbef600ff, 0x01000000, - 0xc0211bfa, 0x00000078, - 0x80788478, 0xc0211b3a, + 0xbefc006f, 0xc031003a, + 0x00000078, 0x80f8c078, + 0xbf8cc07f, 0x80fc907c, + 0xbf800000, 0xbe802d00, + 0xbe822d02, 0xbe842d04, + 0xbe862d06, 0xbe882d08, + 0xbe8a2d0a, 0xbe8c2d0c, + 0xbe8e2d0e, 0xbf06807c, + 0xbf84fff0, 0xb8f82a05, + 0x80788178, 0x8e788a78, + 0xb8ee1605, 0x806e816e, + 0x8e6e866e, 0x80786e78, + 0xbef60084, 0xbef600ff, + 0x01000000, 0xc0211bfa, 0x00000078, 0x80788478, - 0xc0211b7a, 0x00000078, - 0x80788478, 0xc0211eba, + 0xc0211b3a, 0x00000078, + 0x80788478, 0xc0211b7a, 0x00000078, 0x80788478, - 0xc0211efa, 0x00000078, - 0x80788478, 0xc0211c3a, + 0xc0211c3a, 0x00000078, + 0x80788478, 0xc0211c7a, 0x00000078, 0x80788478, - 0xc0211c7a, 0x00000078, - 0x80788478, 0xc0211a3a, + 0xc0211eba, 0x00000078, + 0x80788478, 0xc0211efa, 0x00000078, 0x80788478, - 0xc0211a7a, 0x00000078, - 0x80788478, 0xc0211cfa, + 0xc0211a3a, 0x00000078, + 0x80788478, 0xc0211a7a, 0x00000078, 0x80788478, - 0xbf8cc07f, 0xbefc006f, - 0xbefe007a, 0xbeff007b, - 0x866f71ff, 0x000003ff, - 0xb96f4803, 0x866f71ff, - 0xfffff800, 0x8f6f8b6f, - 0xb96fa2c3, 0xb973f801, - 0xb8ee2a05, 0x806e816e, - 0x8e6e8a6e, 0xb8ef1605, - 0x806f816f, 0x8e6f866f, - 0x806e6f6e, 0x806e746e, - 0x826f8075, 0x866fff6f, - 0x0000ffff, 0xc0071cb7, - 0x00000040, 0xc00b1d37, - 0x00000048, 0xc0031e77, - 0x00000058, 0xc0071eb7, - 0x0000005c, 0xbf8cc07f, - 0x866fff6d, 0xf0000000, - 0x8f6f9c6f, 0x8e6f906f, - 0xbeee0080, 0x876e6f6e, - 0x866fff6d, 0x08000000, - 0x8f6f9b6f, 0x8e6f8f6f, - 0x876e6f6e, 0x866fff70, - 0x00800000, 0x8f6f976f, - 0xb96ef807, 0x866dff6d, - 0x0000ffff, 0x86fe7e7e, - 0x86ea6a6a, 0x8f6e8370, - 0xb96ee0c2, 0xbf800002, - 0xb9700002, 0xbf8a0000, - 0x95806f6c, 0xbf810000, + 0xc0211cfa, 0x00000078, + 0x80788478, 0xbf8cc07f, + 0xbefc006f, 0xbefe0070, + 0xbeff0071, 0x866f7bff, + 0x000003ff, 0xb96f4803, + 0x866f7bff, 0xfffff800, + 0x8f6f8b6f, 0xb96fa2c3, + 0xb973f801, 0xb8ee2a05, + 0x806e816e, 0x8e6e8a6e, + 0xb8ef1605, 0x806f816f, + 0x8e6f866f, 0x806e6f6e, + 0x806e746e, 0x826f8075, + 0x866fff6f, 0x0000ffff, + 0xc00b1c37, 0x00000050, + 0xc00b1d37, 0x00000060, + 0xc0031e77, 0x00000074, + 0xbf8cc07f, 0x866fff6d, + 0xf8000000, 0x8f6f9b6f, + 0x8e6f906f, 0xbeee0080, + 0x876e6f6e, 0x866fff6d, + 0x04000000, 0x8f6f9a6f, + 0x8e6f8f6f, 0x876e6f6e, + 0x866fff7a, 0x00800000, + 0x8f6f976f, 0xb96ef807, + 0x866dff6d, 0x0000ffff, + 0x86fe7e7e, 0x86ea6a6a, + 0x8f6e837a, 0xb96ee0c2, + 0xbf800002, 0xb97a0002, + 0xbf8a0000, 0x95806f6c, + 0xbf810000, 0x00000000, }; diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm index abe1a5da29fb..a47f5b933120 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx8.asm @@ -282,19 +282,6 @@ if G8SR_DEBUG_TIMESTAMP s_waitcnt lgkmcnt(0) //FIXME, will cause xnack?? end - //check whether there is mem_viol - s_getreg_b32 s_save_trapsts, hwreg(HW_REG_TRAPSTS) - s_and_b32 s_save_trapsts, s_save_trapsts, SQ_WAVE_TRAPSTS_MEM_VIOL_MASK - s_cbranch_scc0 L_NO_PC_REWIND - - //if so, need rewind PC assuming GDS operation gets NACKed - s_mov_b32 s_save_tmp, 0 //clear mem_viol bit - s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_MEM_VIOL_SHIFT, 1), s_save_tmp //clear mem_viol bit - s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32] - s_sub_u32 s_save_pc_lo, s_save_pc_lo, 8 //pc[31:0]-8 - s_subb_u32 s_save_pc_hi, s_save_pc_hi, 0x0 // -scc - -L_NO_PC_REWIND: s_mov_b32 s_save_tmp, 0 //clear saveCtx bit s_setreg_b32 hwreg(HW_REG_TRAPSTS, SQ_WAVE_TRAPSTS_SAVECTX_SHIFT, 1), s_save_tmp //clear saveCtx bit diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm index 0bb9c577b3a2..6bae2e022c6e 100644 --- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm +++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx9.asm @@ -150,10 +150,10 @@ var S_SAVE_SPI_INIT_MTYPE_SHIFT = 28 var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000 //bit[26]: FirstWaveInTG var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26 -var S_SAVE_PC_HI_RCNT_SHIFT = 28 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used -var S_SAVE_PC_HI_RCNT_MASK = 0xF0000000 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 27 //FIXME -var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x08000000 //FIXME +var S_SAVE_PC_HI_RCNT_SHIFT = 27 //FIXME check with Brian to ensure all fields other than PC[47:0] can be used +var S_SAVE_PC_HI_RCNT_MASK = 0xF8000000 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_SHIFT = 26 //FIXME +var S_SAVE_PC_HI_FIRST_REPLAY_MASK = 0x04000000 //FIXME var s_save_spi_init_lo = exec_lo var s_save_spi_init_hi = exec_hi @@ -162,8 +162,8 @@ var s_save_pc_lo = ttmp0 //{TTMP1, TTMP0} = {3'h0,pc_rewind[3:0], HT[0],tra var s_save_pc_hi = ttmp1 var s_save_exec_lo = ttmp2 var s_save_exec_hi = ttmp3 -var s_save_tmp = ttmp4 -var s_save_trapsts = ttmp5 //not really used until the end of the SAVE routine +var s_save_tmp = ttmp14 +var s_save_trapsts = ttmp15 //not really used until the end of the SAVE routine var s_save_xnack_mask_lo = ttmp6 var s_save_xnack_mask_hi = ttmp7 var s_save_buf_rsrc0 = ttmp8 @@ -171,9 +171,9 @@ var s_save_buf_rsrc1 = ttmp9 var s_save_buf_rsrc2 = ttmp10 var s_save_buf_rsrc3 = ttmp11 var s_save_status = ttmp12 -var s_save_mem_offset = ttmp14 +var s_save_mem_offset = ttmp4 var s_save_alloc_size = s_save_trapsts //conflict -var s_save_m0 = ttmp15 +var s_save_m0 = ttmp5 var s_save_ttmps_lo = s_save_tmp //no conflict var s_save_ttmps_hi = s_save_trapsts //no conflict @@ -207,10 +207,10 @@ var s_restore_mode = ttmp7 var s_restore_pc_lo = ttmp0 var s_restore_pc_hi = ttmp1 -var s_restore_exec_lo = ttmp14 -var s_restore_exec_hi = ttmp15 -var s_restore_status = ttmp4 -var s_restore_trapsts = ttmp5 +var s_restore_exec_lo = ttmp4 +var s_restore_exec_hi = ttmp5 +var s_restore_status = ttmp14 +var s_restore_trapsts = ttmp15 var s_restore_xnack_mask_lo = xnack_mask_lo var s_restore_xnack_mask_hi = xnack_mask_hi var s_restore_buf_rsrc0 = ttmp8 @@ -266,10 +266,16 @@ if (!EMU_RUN_HACK) L_HALT_WAVE: // If STATUS.HALT is set then this fault must come from SQC instruction fetch. - // We cannot prevent further faults so just terminate the wavefront. + // We cannot prevent further faults. Spin wait until context saved. s_and_b32 ttmp2, s_save_status, SQ_WAVE_STATUS_HALT_MASK s_cbranch_scc0 L_NOT_ALREADY_HALTED - s_endpgm + +L_WAIT_CTX_SAVE: + s_sleep 0x10 + s_getreg_b32 ttmp2, hwreg(HW_REG_TRAPSTS) + s_and_b32 ttmp2, ttmp2, SQ_WAVE_TRAPSTS_SAVECTX_MASK + s_cbranch_scc0 L_WAIT_CTX_SAVE + L_NOT_ALREADY_HALTED: s_or_b32 s_save_status, s_save_status, SQ_WAVE_STATUS_HALT_MASK @@ -293,12 +299,12 @@ L_FETCH_2ND_TRAP: // Read second-level TBA/TMA from first-level TMA and jump if available. // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data) // ttmp12 holds SQ_WAVE_STATUS - s_getreg_b32 ttmp4, hwreg(HW_REG_SQ_SHADER_TMA_LO) - s_getreg_b32 ttmp5, hwreg(HW_REG_SQ_SHADER_TMA_HI) - s_lshl_b64 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 - s_load_dwordx2 [ttmp2, ttmp3], [ttmp4, ttmp5], 0x0 glc:1 // second-level TBA + s_getreg_b32 ttmp14, hwreg(HW_REG_SQ_SHADER_TMA_LO) + s_getreg_b32 ttmp15, hwreg(HW_REG_SQ_SHADER_TMA_HI) + s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 + s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 glc:1 // second-level TBA s_waitcnt lgkmcnt(0) - s_load_dwordx2 [ttmp4, ttmp5], [ttmp4, ttmp5], 0x8 glc:1 // second-level TMA + s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 glc:1 // second-level TMA s_waitcnt lgkmcnt(0) s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3] s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set @@ -405,7 +411,7 @@ end else end - // Save trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic + // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 get_vgpr_size_bytes(s_save_ttmps_lo) get_sgpr_size_bytes(s_save_ttmps_hi) @@ -413,13 +419,11 @@ end s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo s_addc_u32 s_save_ttmps_hi, s_save_spi_init_hi, 0x0 s_and_b32 s_save_ttmps_hi, s_save_ttmps_hi, 0xFFFF - s_store_dwordx2 [ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x40 glc:1 - ack_sqc_store_workaround() - s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x48 glc:1 + s_store_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_save_ttmps_lo, s_save_ttmps_hi], 0x50 glc:1 ack_sqc_store_workaround() - s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x58 glc:1 + s_store_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_save_ttmps_lo, s_save_ttmps_hi], 0x60 glc:1 ack_sqc_store_workaround() - s_store_dwordx2 [ttmp14, ttmp15], [s_save_ttmps_lo, s_save_ttmps_hi], 0x5C glc:1 + s_store_dword ttmp13, [s_save_ttmps_lo, s_save_ttmps_hi], 0x74 glc:1 ack_sqc_store_workaround() /* setup Resource Contants */ @@ -1093,7 +1097,7 @@ end //s_setreg_b32 hwreg(HW_REG_TRAPSTS), s_restore_trapsts //don't overwrite SAVECTX bit as it may be set through external SAVECTX during restore s_setreg_b32 hwreg(HW_REG_MODE), s_restore_mode - // Restore trap temporaries 6-11, 13-15 initialized by SPI debug dispatch logic + // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic // ttmp SR memory offset : size(VGPR)+size(SGPR)+0x40 get_vgpr_size_bytes(s_restore_ttmps_lo) get_sgpr_size_bytes(s_restore_ttmps_hi) @@ -1101,10 +1105,9 @@ end s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0 s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0 s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF - s_load_dwordx2 [ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x40 glc:1 - s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x48 glc:1 - s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x58 glc:1 - s_load_dwordx2 [ttmp14, ttmp15], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x5C glc:1 + s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 glc:1 + s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 glc:1 + s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 glc:1 s_waitcnt lgkmcnt(0) //reuse s_restore_m0 as a temp register diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 083bd8114db1..f91126f5f1be 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -213,6 +213,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->type = KFD_QUEUE_TYPE_COMPUTE; else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) q_properties->type = KFD_QUEUE_TYPE_SDMA; + else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) + q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI; else return -ENOTSUPP; @@ -522,7 +524,7 @@ static int kfd_ioctl_set_trap_handler(struct file *filep, struct kfd_process_device *pdd; dev = kfd_device_by_id(args->gpu_id); - if (dev == NULL) + if (!dev) return -EINVAL; mutex_lock(&p->mutex); @@ -1272,6 +1274,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, if (args->size != kfd_doorbell_process_slice(dev)) return -EINVAL; offset = kfd_get_process_doorbells(dev, p); + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { + if (args->size != PAGE_SIZE) + return -EINVAL; + offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); + if (!offset) + return -ENOMEM; } mutex_lock(&p->mutex); @@ -1301,6 +1309,14 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); args->mmap_offset = offset; + /* MMIO is mapped through kfd device + * Generate a kfd mmap offset + */ + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { + args->mmap_offset = KFD_MMAP_TYPE_MMIO | KFD_MMAP_GPU_ID(args->gpu_id); + args->mmap_offset <<= PAGE_SHIFT; + } + return 0; err_free: @@ -1551,6 +1567,32 @@ copy_from_user_failed: return err; } +static int kfd_ioctl_alloc_queue_gws(struct file *filep, + struct kfd_process *p, void *data) +{ + int retval; + struct kfd_ioctl_alloc_queue_gws_args *args = data; + struct kfd_dev *dev; + + if (!hws_gws_support) + return -ENODEV; + + dev = kfd_device_by_id(args->gpu_id); + if (!dev) { + pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); + return -ENODEV; + } + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) + return -ENODEV; + + mutex_lock(&p->mutex); + retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL); + mutex_unlock(&p->mutex); + + args->first_gws = 0; + return retval; +} + static int kfd_ioctl_get_dmabuf_info(struct file *filep, struct kfd_process *p, void *data) { @@ -1753,6 +1795,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, kfd_ioctl_import_dmabuf, 0), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, + kfd_ioctl_alloc_queue_gws, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) @@ -1845,6 +1889,39 @@ err_i1: return retcode; } +static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, + struct vm_area_struct *vma) +{ + phys_addr_t address; + int ret; + + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + address = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd); + + vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | + VM_DONTDUMP | VM_PFNMAP; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + pr_debug("Process %d mapping mmio page\n" + " target user address == 0x%08llX\n" + " physical address == 0x%08llX\n" + " vm_flags == 0x%04lX\n" + " size == 0x%04lX\n", + process->pasid, (unsigned long long) vma->vm_start, + address, vma->vm_flags, PAGE_SIZE); + + ret = io_remap_pfn_range(vma, + vma->vm_start, + address >> PAGE_SHIFT, + PAGE_SIZE, + vma->vm_page_prot); + return ret; +} + + static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) { struct kfd_process *process; @@ -1875,6 +1952,10 @@ static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) if (!dev) return -ENODEV; return kfd_reserved_mem_mmap(dev, process, vma); + case KFD_MMAP_TYPE_MMIO: + if (!dev) + return -ENODEV; + return kfd_mmio_mmap(dev, process, vma); } return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index 2e7c44955f43..59f8ca4297db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -134,6 +134,7 @@ static struct kfd_gpu_cache_info carrizo_cache_info[] = { #define polaris10_cache_info carrizo_cache_info #define polaris11_cache_info carrizo_cache_info #define polaris12_cache_info carrizo_cache_info +#define vegam_cache_info carrizo_cache_info /* TODO - check & update Vega10 cache details */ #define vega10_cache_info carrizo_cache_info #define raven_cache_info carrizo_cache_info @@ -372,7 +373,7 @@ static int kfd_parse_subtype_iolink(struct crat_subtype_iolink *iolink, if (props->iolink_type == CRAT_IOLINK_TYPE_PCIEXPRESS) props->weight = 20; else if (props->iolink_type == CRAT_IOLINK_TYPE_XGMI) - props->weight = 15; + props->weight = 15 * iolink->num_hops_xgmi; else props->weight = node_distance(id_from, id_to); @@ -652,6 +653,10 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev, pcache_info = polaris12_cache_info; num_of_cache_types = ARRAY_SIZE(polaris12_cache_info); break; + case CHIP_VEGAM: + pcache_info = vegam_cache_info; + num_of_cache_types = ARRAY_SIZE(vegam_cache_info); + break; case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: @@ -1092,6 +1097,7 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size, static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, struct kfd_dev *kdev, + struct kfd_dev *peer_kdev, struct crat_subtype_iolink *sub_type_hdr, uint32_t proximity_domain_from, uint32_t proximity_domain_to) @@ -1110,6 +1116,8 @@ static int kfd_fill_gpu_xgmi_link_to_gpu(int *avail_size, sub_type_hdr->io_interface_type = CRAT_IOLINK_TYPE_XGMI; sub_type_hdr->proximity_domain_from = proximity_domain_from; sub_type_hdr->proximity_domain_to = proximity_domain_to; + sub_type_hdr->num_hops_xgmi = + amdgpu_amdkfd_get_xgmi_hops_count(kdev->kgd, peer_kdev->kgd); return 0; } @@ -1287,7 +1295,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, (char *)sub_type_hdr + sizeof(struct crat_subtype_iolink)); ret = kfd_fill_gpu_xgmi_link_to_gpu( - &avail_size, kdev, + &avail_size, kdev, peer_dev->gpu, (struct crat_subtype_iolink *)sub_type_hdr, proximity_domain, nid); if (ret < 0) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h index 7c3f192fe25f..d54ceebd346b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h @@ -274,7 +274,8 @@ struct crat_subtype_iolink { uint32_t minimum_bandwidth_mbs; uint32_t maximum_bandwidth_mbs; uint32_t recommended_transfer_size; - uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH]; + uint8_t reserved2[CRAT_IOLINK_RESERVED_LENGTH - 1]; + uint8_t num_hops_xgmi; }; /* diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index c1e4d44d6137..9d1b026e29e9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -54,6 +54,7 @@ static const struct kfd_device_info kaveri_device_info = { .needs_iommu_device = true, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -71,6 +72,7 @@ static const struct kfd_device_info carrizo_device_info = { .needs_iommu_device = true, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -87,6 +89,7 @@ static const struct kfd_device_info raven_device_info = { .needs_iommu_device = true, .needs_pci_atomics = true, .num_sdma_engines = 1, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; #endif @@ -105,6 +108,7 @@ static const struct kfd_device_info hawaii_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -121,6 +125,7 @@ static const struct kfd_device_info tonga_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -137,6 +142,7 @@ static const struct kfd_device_info fiji_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -153,6 +159,7 @@ static const struct kfd_device_info fiji_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -170,6 +177,7 @@ static const struct kfd_device_info polaris10_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -186,6 +194,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -202,6 +211,7 @@ static const struct kfd_device_info polaris11_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -218,6 +228,24 @@ static const struct kfd_device_info polaris12_device_info = { .needs_iommu_device = false, .needs_pci_atomics = true, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, + .num_sdma_queues_per_engine = 2, +}; + +static const struct kfd_device_info vegam_device_info = { + .asic_family = CHIP_VEGAM, + .max_pasid_bits = 16, + .max_no_of_hqd = 24, + .doorbell_size = 4, + .ih_ring_entry_size = 4 * sizeof(uint32_t), + .event_interrupt_class = &event_interrupt_class_cik, + .num_of_watch_points = 4, + .mqd_size_aligned = MQD_SIZE_ALIGNED, + .supports_cwsr = true, + .needs_iommu_device = false, + .needs_pci_atomics = true, + .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -234,6 +262,7 @@ static const struct kfd_device_info vega10_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -250,6 +279,7 @@ static const struct kfd_device_info vega10_vf_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -266,6 +296,7 @@ static const struct kfd_device_info vega12_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 2, }; @@ -282,6 +313,7 @@ static const struct kfd_device_info vega20_device_info = { .needs_iommu_device = false, .needs_pci_atomics = false, .num_sdma_engines = 2, + .num_xgmi_sdma_engines = 0, .num_sdma_queues_per_engine = 8, }; @@ -355,6 +387,7 @@ static const struct kfd_deviceid supported_devices[] = { { 0x67CF, &polaris10_device_info }, /* Polaris10 */ { 0x67D0, &polaris10_vf_device_info }, /* Polaris10 vf*/ { 0x67DF, &polaris10_device_info }, /* Polaris10 */ + { 0x6FDF, &polaris10_device_info }, /* Polaris10 */ { 0x67E0, &polaris11_device_info }, /* Polaris11 */ { 0x67E1, &polaris11_device_info }, /* Polaris11 */ { 0x67E3, &polaris11_device_info }, /* Polaris11 */ @@ -372,6 +405,9 @@ static const struct kfd_deviceid supported_devices[] = { { 0x6995, &polaris12_device_info }, /* Polaris12 */ { 0x6997, &polaris12_device_info }, /* Polaris12 */ { 0x699F, &polaris12_device_info }, /* Polaris12 */ + { 0x694C, &vegam_device_info }, /* VegaM */ + { 0x694E, &vegam_device_info }, /* VegaM */ + { 0x694F, &vegam_device_info }, /* VegaM */ { 0x6860, &vega10_device_info }, /* Vega10 */ { 0x6861, &vega10_device_info }, /* Vega10 */ { 0x6862, &vega10_device_info }, /* Vega10 */ @@ -462,6 +498,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, kfd->pdev = pdev; kfd->init_complete = false; kfd->kfd2kgd = f2g; + atomic_set(&kfd->compute_profile, 0); mutex_init(&kfd->doorbell_mutex); memset(&kfd->doorbell_available_index, 0, @@ -516,6 +553,13 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, } else kfd->max_proc_per_quantum = hws_max_conc_proc; + /* Allocate global GWS that is shared by all KFD processes */ + if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd, + amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) { + dev_err(kfd_device, "Could not allocate %d gws\n", + amdgpu_amdkfd_get_num_gws(kfd->kgd)); + goto out; + } /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; @@ -539,7 +583,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, false)) { dev_err(kfd_device, "Could not allocate %d bytes\n", size); - goto out; + goto alloc_gtt_mem_failure; } dev_info(kfd_device, "Allocated %d bytes on gart\n", size); @@ -609,6 +653,9 @@ kfd_doorbell_error: kfd_gtt_sa_fini(kfd); kfd_gtt_sa_init_error: amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); +alloc_gtt_mem_failure: + if (hws_gws_support) + amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); dev_err(kfd_device, "device %x:%x NOT added due to errors\n", kfd->pdev->vendor, kfd->pdev->device); @@ -626,6 +673,8 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_doorbell_fini(kfd); kfd_gtt_sa_fini(kfd); amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); + if (hws_gws_support) + amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); } kfree(kfd); @@ -1036,6 +1085,21 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) atomic_inc(&kfd->sram_ecc_flag); } +void kfd_inc_compute_active(struct kfd_dev *kfd) +{ + if (atomic_inc_return(&kfd->compute_profile) == 1) + amdgpu_amdkfd_set_compute_idle(kfd->kgd, false); +} + +void kfd_dec_compute_active(struct kfd_dev *kfd) +{ + int count = atomic_dec_return(&kfd->compute_profile); + + if (count == 0) + amdgpu_amdkfd_set_compute_idle(kfd->kgd, true); + WARN_ONCE(count < 0, "Compute profile ref. count error"); +} + #if defined(CONFIG_DEBUG_FS) /* This function will send a package to HIQ to hang the HWS diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index c6c9530e704e..ece35c7a77b5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -60,14 +60,14 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd); static void deallocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int sdma_queue_id); + struct queue *q); static void kfd_process_hw_exception(struct work_struct *work); static inline enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) { - if (type == KFD_QUEUE_TYPE_SDMA) + if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) return KFD_MQD_TYPE_SDMA; return KFD_MQD_TYPE_CP; } @@ -107,12 +107,23 @@ static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) return dqm->dev->device_info->num_sdma_engines; } +static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info->num_xgmi_sdma_engines; +} + unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) { return dqm->dev->device_info->num_sdma_engines * dqm->dev->device_info->num_sdma_queues_per_engine; } +unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) +{ + return dqm->dev->device_info->num_xgmi_sdma_engines + * dqm->dev->device_info->num_sdma_queues_per_engine; +} + void program_sh_mem_settings(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -133,7 +144,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) * preserve the user mode ABI. */ q->doorbell_id = q->properties.queue_id; - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { /* For SDMA queues on SOC15 with 8-byte doorbell, use static * doorbell assignments based on the engine and queue id. * The doobell index distance between RLC (2*i) and (2*i+1) @@ -174,7 +186,8 @@ static void deallocate_doorbell(struct qcm_process_device *qpd, struct kfd_dev *dev = qpd->dqm->dev; if (!KFD_IS_SOC15(dev->device_info->asic_family) || - q->properties.type == KFD_QUEUE_TYPE_SDMA) + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) return; old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); @@ -289,7 +302,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) retval = create_compute_queue_nocpsch(dqm, q, qpd); - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) retval = create_sdma_queue_nocpsch(dqm, q, qpd); else retval = -EINVAL; @@ -307,6 +321,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) dqm->sdma_queue_count++; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->xgmi_sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's @@ -368,9 +384,7 @@ static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, struct mqd_manager *mqd_mgr; int retval; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (!mqd_mgr) - return -ENOMEM; + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE]; retval = allocate_hqd(dqm, q); if (retval) @@ -425,16 +439,17 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, int retval; struct mqd_manager *mqd_mgr; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) - return -ENOMEM; + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { deallocate_hqd(dqm, q); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + dqm->xgmi_sdma_queue_count--; + deallocate_sdma_queue(dqm, q); } else { pr_debug("q->properties.type %d is invalid\n", q->properties.type); @@ -501,12 +516,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) retval = -ENODEV; goto out_unlock; } - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { - retval = -ENOMEM; - goto out_unlock; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; /* * Eviction state logic: we only mark active queues as evicted * to avoid the overhead of restoring inactive queues later @@ -529,7 +540,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) } } else if (prev_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || - q->properties.type == KFD_QUEUE_TYPE_SDMA)) { + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); @@ -556,7 +568,8 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) retval = map_queues_cpsch(dqm); else if (q->properties.is_active && (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || - q->properties.type == KFD_QUEUE_TYPE_SDMA)) { + q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { if (WARN(q->process->mm != current->mm, "should only run in user thread")) retval = -EFAULT; @@ -571,27 +584,6 @@ out_unlock: return retval; } -static struct mqd_manager *get_mqd_manager( - struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) -{ - struct mqd_manager *mqd_mgr; - - if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) - return NULL; - - pr_debug("mqd type %d\n", type); - - mqd_mgr = dqm->mqd_mgrs[type]; - if (!mqd_mgr) { - mqd_mgr = mqd_manager_init(type, dqm->dev); - if (!mqd_mgr) - pr_err("mqd manager is NULL"); - dqm->mqd_mgrs[type] = mqd_mgr; - } - - return mqd_mgr; -} - static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, struct qcm_process_device *qpd) { @@ -612,13 +604,8 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, list_for_each_entry(q, &qpd->queues_list, list) { if (!q->properties.is_active) continue; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { /* should not be here */ - pr_err("Cannot evict queue, mqd mgr is NULL\n"); - retval = -ENOMEM; - goto out; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; q->properties.is_evicted = true; q->properties.is_active = false; retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, @@ -717,13 +704,8 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, list_for_each_entry(q, &qpd->queues_list, list) { if (!q->properties.is_evicted) continue; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { /* should not be here */ - pr_err("Cannot restore queue, mqd mgr is NULL\n"); - retval = -ENOMEM; - goto out; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; q->properties.is_evicted = false; q->properties.is_active = true; retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, @@ -811,11 +793,15 @@ static int register_process(struct device_queue_manager *dqm, retval = dqm->asic_ops.update_qpd(dqm, qpd); - if (dqm->processes_count++ == 0) - amdgpu_amdkfd_set_compute_idle(dqm->dev->kgd, false); + dqm->processes_count++; dqm_unlock(dqm); + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + kfd_inc_compute_active(dqm->dev); + return retval; } @@ -835,9 +821,7 @@ static int unregister_process(struct device_queue_manager *dqm, if (qpd == cur->qpd) { list_del(&cur->list); kfree(cur); - if (--dqm->processes_count == 0) - amdgpu_amdkfd_set_compute_idle( - dqm->dev->kgd, true); + dqm->processes_count--; goto out; } } @@ -845,6 +829,13 @@ static int unregister_process(struct device_queue_manager *dqm, retval = 1; out: dqm_unlock(dqm); + + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + if (!retval) + kfd_dec_compute_active(dqm->dev); + return retval; } @@ -880,6 +871,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->next_pipe_to_allocate = 0; dqm->sdma_queue_count = 0; + dqm->xgmi_sdma_queue_count = 0; for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); @@ -891,7 +883,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) } dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; - dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; + dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; + dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1; return 0; } @@ -922,26 +915,56 @@ static int stop_nocpsch(struct device_queue_manager *dqm) } static int allocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int *sdma_queue_id) + struct queue *q) { int bit; - if (dqm->sdma_bitmap == 0) - return -ENOMEM; + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + if (dqm->sdma_bitmap == 0) + return -ENOMEM; + bit = __ffs64(dqm->sdma_bitmap); + dqm->sdma_bitmap &= ~(1ULL << bit); + q->sdma_id = bit; + q->properties.sdma_engine_id = q->sdma_id % + get_num_sdma_engines(dqm); + q->properties.sdma_queue_id = q->sdma_id / + get_num_sdma_engines(dqm); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + if (dqm->xgmi_sdma_bitmap == 0) + return -ENOMEM; + bit = __ffs64(dqm->xgmi_sdma_bitmap); + dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); + q->sdma_id = bit; + /* sdma_engine_id is sdma id including + * both PCIe-optimized SDMAs and XGMI- + * optimized SDMAs. The calculation below + * assumes the first N engines are always + * PCIe-optimized ones + */ + q->properties.sdma_engine_id = get_num_sdma_engines(dqm) + + q->sdma_id % get_num_xgmi_sdma_engines(dqm); + q->properties.sdma_queue_id = q->sdma_id / + get_num_xgmi_sdma_engines(dqm); + } - bit = ffs(dqm->sdma_bitmap) - 1; - dqm->sdma_bitmap &= ~(1 << bit); - *sdma_queue_id = bit; + pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); + pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); return 0; } static void deallocate_sdma_queue(struct device_queue_manager *dqm, - unsigned int sdma_queue_id) + struct queue *q) { - if (sdma_queue_id >= get_num_sdma_queues(dqm)) - return; - dqm->sdma_bitmap |= (1 << sdma_queue_id); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { + if (q->sdma_id >= get_num_sdma_queues(dqm)) + return; + dqm->sdma_bitmap |= (1ULL << q->sdma_id); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) + return; + dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id); + } } static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, @@ -951,25 +974,16 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, struct mqd_manager *mqd_mgr; int retval; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); - if (!mqd_mgr) - return -ENOMEM; + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]; - retval = allocate_sdma_queue(dqm, &q->sdma_id); + retval = allocate_sdma_queue(dqm, q); if (retval) return retval; - q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); - q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); - retval = allocate_doorbell(qpd, q); if (retval) goto out_deallocate_sdma_queue; - pr_debug("SDMA id is: %d\n", q->sdma_id); - pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); - pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, &q->gart_mqd_addr, &q->properties); @@ -988,7 +1002,7 @@ out_uninit_mqd: out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); return retval; } @@ -1046,8 +1060,10 @@ static int initialize_cpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(&dqm->queues); dqm->queue_count = dqm->processes_count = 0; dqm->sdma_queue_count = 0; + dqm->xgmi_sdma_queue_count = 0; dqm->active_runlist = false; - dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; + dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; + dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1; INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); @@ -1162,38 +1178,26 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, int retval; struct mqd_manager *mqd_mgr; - retval = 0; - - dqm_lock(dqm); - if (dqm->total_queue_count >= max_num_of_queues_per_device) { pr_warn("Can't create new usermode queue because %d queues were already created\n", dqm->total_queue_count); retval = -EPERM; - goto out_unlock; + goto out; } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - retval = allocate_sdma_queue(dqm, &q->sdma_id); + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + retval = allocate_sdma_queue(dqm, q); if (retval) - goto out_unlock; - q->properties.sdma_queue_id = - q->sdma_id / get_num_sdma_engines(dqm); - q->properties.sdma_engine_id = - q->sdma_id % get_num_sdma_engines(dqm); + goto out; } retval = allocate_doorbell(qpd, q); if (retval) goto out_deallocate_sdma_queue; - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - - if (!mqd_mgr) { - retval = -ENOMEM; - goto out_deallocate_doorbell; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; /* * Eviction state logic: we only mark active queues as evicted * to avoid the overhead of restoring inactive queues later @@ -1202,9 +1206,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, q->properties.is_evicted = (q->properties.queue_size > 0 && q->properties.queue_percent > 0 && q->properties.queue_address != 0); - dqm->asic_ops.init_sdma_vm(dqm, q, qpd); - q->properties.tba_addr = qpd->tba_addr; q->properties.tma_addr = qpd->tma_addr; retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, @@ -1212,6 +1214,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (retval) goto out_deallocate_doorbell; + dqm_lock(dqm); + list_add(&q->list, &qpd->queues_list); qpd->queue_count++; if (q->properties.is_active) { @@ -1222,6 +1226,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, if (q->properties.type == KFD_QUEUE_TYPE_SDMA) dqm->sdma_queue_count++; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->xgmi_sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. @@ -1237,11 +1243,10 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, out_deallocate_doorbell: deallocate_doorbell(qpd, q); out_deallocate_sdma_queue: - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - deallocate_sdma_queue(dqm, q->sdma_id); -out_unlock: - dqm_unlock(dqm); - + if (q->properties.type == KFD_QUEUE_TYPE_SDMA || + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + deallocate_sdma_queue(dqm, q); +out: return retval; } @@ -1269,12 +1274,18 @@ int amdkfd_fence_wait_timeout(unsigned int *fence_addr, return 0; } -static int unmap_sdma_queues(struct device_queue_manager *dqm, - unsigned int sdma_engine) +static int unmap_sdma_queues(struct device_queue_manager *dqm) { - return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, - sdma_engine); + int i, retval = 0; + + for (i = 0; i < dqm->dev->device_info->num_sdma_engines + + dqm->dev->device_info->num_xgmi_sdma_engines; i++) { + retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i); + if (retval) + return retval; + } + return retval; } /* dqm->lock mutex has to be locked before calling this function */ @@ -1310,13 +1321,11 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, if (!dqm->active_runlist) return retval; - pr_debug("Before destroying queues, sdma queue count is : %u\n", - dqm->sdma_queue_count); + pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n", + dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count); - if (dqm->sdma_queue_count > 0) { - unmap_sdma_queues(dqm, 0); - unmap_sdma_queues(dqm, 1); - } + if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count) + unmap_sdma_queues(dqm); retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, filter, filter_param, false, 0); @@ -1380,18 +1389,17 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, } - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { - retval = -ENOMEM; - goto failed; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; deallocate_doorbell(qpd, q); if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + dqm->xgmi_sdma_queue_count--; + deallocate_sdma_queue(dqm, q); } list_del(&q->list); @@ -1404,8 +1412,6 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = true; } - mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); - /* * Unconditionally decrement this counter, regardless of the queue's * type @@ -1416,9 +1422,11 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, dqm_unlock(dqm); + /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ + mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); + return retval; -failed: failed_try_destroy_debugged_queue: dqm_unlock(dqm); @@ -1521,6 +1529,7 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, struct queue *q, *next; struct device_process_node *cur, *next_dpn; int retval = 0; + bool found = false; dqm_lock(dqm); @@ -1539,11 +1548,19 @@ static int process_termination_nocpsch(struct device_queue_manager *dqm, list_del(&cur->list); kfree(cur); dqm->processes_count--; + found = true; break; } } dqm_unlock(dqm); + + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + if (found) + kfd_dec_compute_active(dqm->dev); + return retval; } @@ -1564,11 +1581,7 @@ static int get_wave_state(struct device_queue_manager *dqm, goto dqm_unlock; } - mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); - if (!mqd_mgr) { - r = -ENOMEM; - goto dqm_unlock; - } + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE]; if (!mqd_mgr->get_wave_state) { r = -EINVAL; @@ -1593,6 +1606,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, struct device_process_node *cur, *next_dpn; enum kfd_unmap_queues_filter filter = KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; + bool found = false; retval = 0; @@ -1611,7 +1625,10 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, list_for_each_entry(q, &qpd->queues_list, list) { if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { dqm->sdma_queue_count--; - deallocate_sdma_queue(dqm, q->sdma_id); + deallocate_sdma_queue(dqm, q); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + dqm->xgmi_sdma_queue_count--; + deallocate_sdma_queue(dqm, q); } if (q->properties.is_active) @@ -1626,6 +1643,7 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, list_del(&cur->list); kfree(cur); dqm->processes_count--; + found = true; break; } } @@ -1637,21 +1655,68 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, qpd->reset_wavefronts = false; } - /* lastly, free mqd resources */ + dqm_unlock(dqm); + + /* Outside the DQM lock because under the DQM lock we can't do + * reclaim or take other locks that others hold while reclaiming. + */ + if (found) + kfd_dec_compute_active(dqm->dev); + + /* Lastly, free mqd resources. + * Do uninit_mqd() after dqm_unlock to avoid circular locking. + */ list_for_each_entry_safe(q, next, &qpd->queues_list, list) { - mqd_mgr = dqm->ops.get_mqd_manager(dqm, - get_mqd_type_from_queue_type(q->properties.type)); - if (!mqd_mgr) { - retval = -ENOMEM; - goto out; - } + mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( + q->properties.type)]; list_del(&q->list); qpd->queue_count--; mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); } -out: - dqm_unlock(dqm); + return retval; +} + +static int init_mqd_managers(struct device_queue_manager *dqm) +{ + int i, j; + struct mqd_manager *mqd_mgr; + + for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { + mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); + if (!mqd_mgr) { + pr_err("mqd manager [%d] initialization failed\n", i); + goto out_free; + } + dqm->mqd_mgrs[i] = mqd_mgr; + } + + return 0; + +out_free: + for (j = 0; j < i; j++) { + kfree(dqm->mqd_mgrs[j]); + dqm->mqd_mgrs[j] = NULL; + } + + return -ENOMEM; +} + +/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ +static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) +{ + int retval; + struct kfd_dev *dev = dqm->dev; + struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; + uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * + dev->device_info->num_sdma_engines * + dev->device_info->num_sdma_queues_per_engine + + dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; + + retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size, + &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), + (void *)&(mem_obj->cpu_ptr), true); + return retval; } @@ -1692,7 +1757,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.stop = stop_cpsch; dqm->ops.destroy_queue = destroy_queue_cpsch; dqm->ops.update_queue = update_queue; - dqm->ops.get_mqd_manager = get_mqd_manager; dqm->ops.register_process = register_process; dqm->ops.unregister_process = unregister_process; dqm->ops.uninitialize = uninitialize; @@ -1712,7 +1776,6 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.create_queue = create_queue_nocpsch; dqm->ops.destroy_queue = destroy_queue_nocpsch; dqm->ops.update_queue = update_queue; - dqm->ops.get_mqd_manager = get_mqd_manager; dqm->ops.register_process = register_process; dqm->ops.unregister_process = unregister_process; dqm->ops.initialize = initialize_nocpsch; @@ -1748,6 +1811,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: device_queue_manager_init_vi_tonga(&dqm->asic_ops); break; @@ -1763,6 +1827,14 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) goto out_free; } + if (init_mqd_managers(dqm)) + goto out_free; + + if (allocate_hiq_sdma_mqd(dqm)) { + pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); + goto out_free; + } + if (!dqm->ops.initialize(dqm)) return dqm; @@ -1771,9 +1843,17 @@ out_free: return NULL; } +void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd) +{ + WARN(!mqd, "No hiq sdma mqd trunk to free"); + + amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem); +} + void device_queue_manager_uninit(struct device_queue_manager *dqm) { dqm->ops.uninitialize(dqm); + deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); kfree(dqm); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 70e38a2e23b9..88b4c007696e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -48,8 +48,6 @@ struct device_process_node { * * @update_queue: Queue update routine. * - * @get_mqd_manager: Returns the mqd manager according to the mqd type. - * * @exeute_queues: Dispatches the queues list to the H/W. * * @register_process: This routine associates a specific process with device. @@ -97,10 +95,6 @@ struct device_queue_manager_ops { int (*update_queue)(struct device_queue_manager *dqm, struct queue *q); - struct mqd_manager * (*get_mqd_manager) - (struct device_queue_manager *dqm, - enum KFD_MQD_TYPE type); - int (*register_process)(struct device_queue_manager *dqm, struct qcm_process_device *qpd); @@ -158,6 +152,8 @@ struct device_queue_manager_asic_ops { void (*init_sdma_vm)(struct device_queue_manager *dqm, struct queue *q, struct qcm_process_device *qpd); + struct mqd_manager * (*mqd_manager_init)(enum KFD_MQD_TYPE type, + struct kfd_dev *dev); }; /** @@ -185,10 +181,12 @@ struct device_queue_manager { unsigned int processes_count; unsigned int queue_count; unsigned int sdma_queue_count; + unsigned int xgmi_sdma_queue_count; unsigned int total_queue_count; unsigned int next_pipe_to_allocate; unsigned int *allocated_queues; - unsigned int sdma_bitmap; + uint64_t sdma_bitmap; + uint64_t xgmi_sdma_bitmap; unsigned int vmid_bitmap; uint64_t pipelines_addr; struct kfd_mem_obj *pipeline_mem; @@ -201,6 +199,7 @@ struct device_queue_manager { /* hw exception */ bool is_hws_hang; struct work_struct hw_exception_work; + struct kfd_mem_obj hiq_sdma_mqd; }; void device_queue_manager_init_cik( @@ -219,6 +218,7 @@ unsigned int get_queues_num(struct device_queue_manager *dqm); unsigned int get_queues_per_pipe(struct device_queue_manager *dqm); unsigned int get_pipes_per_mec(struct device_queue_manager *dqm); unsigned int get_num_sdma_queues(struct device_queue_manager *dqm); +unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm); static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c index aed4c21417bf..0d26506798cf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c @@ -48,6 +48,7 @@ void device_queue_manager_init_cik( asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; asic_ops->update_qpd = update_qpd_cik; asic_ops->init_sdma_vm = init_sdma_vm; + asic_ops->mqd_manager_init = mqd_manager_init_cik; } void device_queue_manager_init_cik_hawaii( @@ -56,6 +57,7 @@ void device_queue_manager_init_cik_hawaii( asic_ops->set_cache_memory_policy = set_cache_memory_policy_cik; asic_ops->update_qpd = update_qpd_cik_hawaii; asic_ops->init_sdma_vm = init_sdma_vm_hawaii; + asic_ops->mqd_manager_init = mqd_manager_init_cik_hawaii; } static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c index 417515332c35..e9fe39382371 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c @@ -37,6 +37,7 @@ void device_queue_manager_init_v9( { asic_ops->update_qpd = update_qpd_v9; asic_ops->init_sdma_vm = init_sdma_vm_v9; + asic_ops->mqd_manager_init = mqd_manager_init_v9; } static uint32_t compute_sh_mem_bases_64bit(struct kfd_process_device *pdd) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c index c3a5dcfe877a..3a7cb2f88366 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_vi.c @@ -54,6 +54,7 @@ void device_queue_manager_init_vi( asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi; asic_ops->update_qpd = update_qpd_vi; asic_ops->init_sdma_vm = init_sdma_vm; + asic_ops->mqd_manager_init = mqd_manager_init_vi; } void device_queue_manager_init_vi_tonga( @@ -62,6 +63,7 @@ void device_queue_manager_init_vi_tonga( asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; asic_ops->update_qpd = update_qpd_vi_tonga; asic_ops->init_sdma_vm = init_sdma_vm_tonga; + asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga; } static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 6e1d41c5bf86..d674d4b3340f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -983,7 +983,7 @@ void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, return; /* Presumably process exited. */ memset(&memory_exception_data, 0, sizeof(memory_exception_data)); memory_exception_data.gpu_id = dev->id; - memory_exception_data.failure.imprecise = 1; + memory_exception_data.failure.imprecise = true; /* Set failure reason */ if (info) { memory_exception_data.va = (info->page_addr) << PAGE_SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index 213ea5454d11..22a8e88b6a67 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -398,6 +398,7 @@ int kfd_init_apertures(struct kfd_process *process) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: kfd_init_apertures_vi(pdd, id); break; case CHIP_VEGA10: @@ -435,5 +436,3 @@ int kfd_init_apertures(struct kfd_process *process) return 0; } - - diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index f1596881f20a..1cc03b3ddbb9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -58,9 +58,10 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->nop_packet = nop.u32all; switch (type) { case KFD_QUEUE_TYPE_DIQ: + kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_DIQ]; + break; case KFD_QUEUE_TYPE_HIQ: - kq->mqd_mgr = dev->dqm->ops.get_mqd_manager(dev->dqm, - KFD_MQD_TYPE_HIQ); + kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; break; default: pr_err("Invalid queue type %d\n", type); @@ -314,6 +315,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: kernel_queue_init_vi(&kq->ops_asic_specific); break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c index 33830b1a5a54..07f02f8e4fe4 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c @@ -153,14 +153,13 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, sizeof(struct pm4_mes_map_queues)); - packet->bitfields2.alloc_format = - alloc_format__mes_map_queues__one_per_pipe_vi; packet->bitfields2.num_queues = 1; packet->bitfields2.queue_sel = queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; packet->bitfields2.engine_sel = engine_sel__mes_map_queues__compute_vi; + packet->bitfields2.gws_control_queue = q->gws ? 1 : 0; packet->bitfields2.queue_type = queue_type__mes_map_queues__normal_compute_vi; @@ -175,6 +174,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, queue_type__mes_map_queues__debug_interface_queue_vi; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = q->properties.sdma_engine_id + engine_sel__mes_map_queues__sdma0_vi; use_static = false; /* no static queues under SDMA */ @@ -221,6 +221,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, engine_sel__mes_unmap_queues__compute; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = engine_sel__mes_unmap_queues__sdma0 + sdma_engine; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c index bf20c6d32ef3..2adaf40027eb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c @@ -190,8 +190,6 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, packet->header.u32All = pm_build_pm4_header(IT_MAP_QUEUES, sizeof(struct pm4_mes_map_queues)); - packet->bitfields2.alloc_format = - alloc_format__mes_map_queues__one_per_pipe_vi; packet->bitfields2.num_queues = 1; packet->bitfields2.queue_sel = queue_sel__mes_map_queues__map_to_hws_determined_queue_slots_vi; @@ -212,6 +210,7 @@ static int pm_map_queues_vi(struct packet_manager *pm, uint32_t *buffer, queue_type__mes_map_queues__debug_interface_queue_vi; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = q->properties.sdma_engine_id + engine_sel__mes_map_queues__sdma0_vi; use_static = false; /* no static queues under SDMA */ @@ -258,6 +257,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer, engine_sel__mes_unmap_queues__compute; break; case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: packet->bitfields2.engine_sel = engine_sel__mes_unmap_queues__sdma0 + sdma_engine; break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c index aed9b9b82213..9307811bc427 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c @@ -23,34 +23,54 @@ #include "kfd_mqd_manager.h" #include "amdgpu_amdkfd.h" +#include "kfd_device_queue_manager.h" -struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, - struct kfd_dev *dev) +struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev) { - switch (dev->device_info->asic_family) { - case CHIP_KAVERI: - return mqd_manager_init_cik(type, dev); - case CHIP_HAWAII: - return mqd_manager_init_cik_hawaii(type, dev); - case CHIP_CARRIZO: - return mqd_manager_init_vi(type, dev); - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - return mqd_manager_init_vi_tonga(type, dev); - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - return mqd_manager_init_v9(type, dev); - default: - WARN(1, "Unexpected ASIC family %u", - dev->device_info->asic_family); - } + struct kfd_mem_obj *mqd_mem_obj = NULL; + + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); + if (!mqd_mem_obj) + return NULL; + + mqd_mem_obj->gtt_mem = dev->dqm->hiq_sdma_mqd.gtt_mem; + mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr; + mqd_mem_obj->cpu_ptr = dev->dqm->hiq_sdma_mqd.cpu_ptr; + + return mqd_mem_obj; +} + +struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev, + struct queue_properties *q) +{ + struct kfd_mem_obj *mqd_mem_obj = NULL; + uint64_t offset; - return NULL; + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); + if (!mqd_mem_obj) + return NULL; + + offset = (q->sdma_engine_id * + dev->device_info->num_sdma_queues_per_engine + + q->sdma_queue_id) * + dev->dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size; + + offset += dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; + + mqd_mem_obj->gtt_mem = (void *)((uint64_t)dev->dqm->hiq_sdma_mqd.gtt_mem + + offset); + mqd_mem_obj->gpu_addr = dev->dqm->hiq_sdma_mqd.gpu_addr + offset; + mqd_mem_obj->cpu_ptr = (uint32_t *)((uint64_t) + dev->dqm->hiq_sdma_mqd.cpu_ptr + offset); + + return mqd_mem_obj; +} + +void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj) +{ + WARN_ON(!mqd_mem_obj->gtt_mem); + kfree(mqd_mem_obj); } void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h index f8261313ae7b..56af256a191b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.h @@ -99,8 +99,16 @@ struct mqd_manager { struct mutex mqd_mutex; struct kfd_dev *dev; + uint32_t mqd_size; }; +struct kfd_mem_obj *allocate_hiq_mqd(struct kfd_dev *dev); + +struct kfd_mem_obj *allocate_sdma_mqd(struct kfd_dev *dev, + struct queue_properties *q); +void uninit_mqd_hiq_sdma(struct mqd_manager *mm, void *mqd, + struct kfd_mem_obj *mqd_mem_obj); + void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm, const uint32_t *cu_mask, uint32_t cu_mask_count, uint32_t *se_mask); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index ae90a99909ef..6e8509ec29d9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -66,6 +66,22 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se3); } +static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, + struct queue_properties *q) +{ + struct kfd_mem_obj *mqd_mem_obj; + + if (q->type == KFD_QUEUE_TYPE_HIQ) + return allocate_hiq_mqd(kfd); + + if (kfd_gtt_sa_allocate(kfd, sizeof(struct cik_mqd), + &mqd_mem_obj)) + return NULL; + + return mqd_mem_obj; +} + + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -73,11 +89,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, uint64_t addr; struct cik_mqd *m; int retval; + struct kfd_dev *kfd = mm->dev; - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), - mqd_mem_obj); - - if (retval != 0) + *mqd_mem_obj = allocate_mqd(kfd, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -136,12 +151,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, { int retval; struct cik_sdma_rlc_registers *m; + struct kfd_dev *dev = mm->dev; - retval = kfd_gtt_sa_allocate(mm->dev, - sizeof(struct cik_sdma_rlc_registers), - mqd_mem_obj); - - if (retval != 0) + *mqd_mem_obj = allocate_sdma_mqd(dev, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct cik_sdma_rlc_registers *) (*mqd_mem_obj)->cpu_ptr; @@ -163,11 +176,6 @@ static void uninit_mqd(struct mqd_manager *mm, void *mqd, kfd_gtt_sa_free(mm->dev, mqd_mem_obj); } -static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, - struct kfd_mem_obj *mqd_mem_obj) -{ - kfd_gtt_sa_free(mm->dev, mqd_mem_obj); -} static int load_mqd(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, @@ -400,28 +408,43 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct cik_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_HIQ: mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct cik_mqd); +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_DIQ: + mqd->init_mqd = init_mqd_hiq; mqd->uninit_mqd = uninit_mqd; mqd->load_mqd = load_mqd; mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct cik_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_SDMA: mqd->init_mqd = init_mqd_sdma; - mqd->uninit_mqd = uninit_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; mqd->load_mqd = load_mqd_sdma; mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = destroy_mqd_sdma; mqd->is_occupied = is_occupied_sdma; + mqd->mqd_size = sizeof(struct cik_sdma_rlc_registers); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 9dbba609450e..4750338199b6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -67,33 +67,54 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se3); } -static int init_mqd(struct mqd_manager *mm, void **mqd, - struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, - struct queue_properties *q) +static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, + struct queue_properties *q) { int retval; - uint64_t addr; - struct v9_mqd *m; - struct kfd_dev *kfd = mm->dev; + struct kfd_mem_obj *mqd_mem_obj = NULL; + + if (q->type == KFD_QUEUE_TYPE_HIQ) + return allocate_hiq_mqd(kfd); /* From V9, for CWSR, the control stack is located on the next page * boundary after the mqd, we will use the gtt allocation function * instead of sub-allocation function. */ if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { - *mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); - if (!*mqd_mem_obj) - return -ENOMEM; + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); + if (!mqd_mem_obj) + return NULL; retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd, ALIGN(q->ctl_stack_size, PAGE_SIZE) + ALIGN(sizeof(struct v9_mqd), PAGE_SIZE), - &((*mqd_mem_obj)->gtt_mem), - &((*mqd_mem_obj)->gpu_addr), - (void *)&((*mqd_mem_obj)->cpu_ptr), true); - } else - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct v9_mqd), - mqd_mem_obj); - if (retval != 0) + &(mqd_mem_obj->gtt_mem), + &(mqd_mem_obj->gpu_addr), + (void *)&(mqd_mem_obj->cpu_ptr), true); + } else { + retval = kfd_gtt_sa_allocate(kfd, sizeof(struct v9_mqd), + &mqd_mem_obj); + } + + if (retval) { + kfree(mqd_mem_obj); + return NULL; + } + + return mqd_mem_obj; + +} + +static int init_mqd(struct mqd_manager *mm, void **mqd, + struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, + struct queue_properties *q) +{ + int retval; + uint64_t addr; + struct v9_mqd *m; + struct kfd_dev *kfd = mm->dev; + + *mqd_mem_obj = allocate_mqd(kfd, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct v9_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -328,13 +349,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, { int retval; struct v9_sdma_mqd *m; + struct kfd_dev *dev = mm->dev; - - retval = kfd_gtt_sa_allocate(mm->dev, - sizeof(struct v9_sdma_mqd), - mqd_mem_obj); - - if (retval != 0) + *mqd_mem_obj = allocate_sdma_mqd(dev, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct v9_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -350,12 +368,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, return retval; } -static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, - struct kfd_mem_obj *mqd_mem_obj) -{ - kfd_gtt_sa_free(mm->dev, mqd_mem_obj); -} - static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) @@ -459,28 +471,43 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->get_wave_state = get_wave_state; + mqd->mqd_size = sizeof(struct v9_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_HIQ: mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct v9_mqd); +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_DIQ: + mqd->init_mqd = init_mqd_hiq; mqd->uninit_mqd = uninit_mqd; mqd->load_mqd = load_mqd; mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct v9_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_SDMA: mqd->init_mqd = init_mqd_sdma; - mqd->uninit_mqd = uninit_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; mqd->load_mqd = load_mqd_sdma; mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = destroy_mqd_sdma; mqd->is_occupied = is_occupied_sdma; + mqd->mqd_size = sizeof(struct v9_sdma_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 6469b3456f00..b550dea9b10a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -68,6 +68,21 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se3); } +static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, + struct queue_properties *q) +{ + struct kfd_mem_obj *mqd_mem_obj; + + if (q->type == KFD_QUEUE_TYPE_HIQ) + return allocate_hiq_mqd(kfd); + + if (kfd_gtt_sa_allocate(kfd, sizeof(struct vi_mqd), + &mqd_mem_obj)) + return NULL; + + return mqd_mem_obj; +} + static int init_mqd(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -75,10 +90,10 @@ static int init_mqd(struct mqd_manager *mm, void **mqd, int retval; uint64_t addr; struct vi_mqd *m; + struct kfd_dev *kfd = mm->dev; - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct vi_mqd), - mqd_mem_obj); - if (retval != 0) + *mqd_mem_obj = allocate_mqd(kfd, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct vi_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -329,13 +344,10 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, { int retval; struct vi_sdma_mqd *m; + struct kfd_dev *dev = mm->dev; - - retval = kfd_gtt_sa_allocate(mm->dev, - sizeof(struct vi_sdma_mqd), - mqd_mem_obj); - - if (retval != 0) + *mqd_mem_obj = allocate_sdma_mqd(dev, q); + if (!*mqd_mem_obj) return -ENOMEM; m = (struct vi_sdma_mqd *) (*mqd_mem_obj)->cpu_ptr; @@ -343,7 +355,7 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, memset(m, 0, sizeof(struct vi_sdma_mqd)); *mqd = m; - if (gart_addr != NULL) + if (gart_addr) *gart_addr = (*mqd_mem_obj)->gpu_addr; retval = mm->update_mqd(mm, m, q); @@ -351,12 +363,6 @@ static int init_mqd_sdma(struct mqd_manager *mm, void **mqd, return retval; } -static void uninit_mqd_sdma(struct mqd_manager *mm, void *mqd, - struct kfd_mem_obj *mqd_mem_obj) -{ - kfd_gtt_sa_free(mm->dev, mqd_mem_obj); -} - static int load_mqd_sdma(struct mqd_manager *mm, void *mqd, uint32_t pipe_id, uint32_t queue_id, struct queue_properties *p, struct mm_struct *mms) @@ -459,28 +465,43 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->get_wave_state = get_wave_state; + mqd->mqd_size = sizeof(struct vi_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_HIQ: mqd->init_mqd = init_mqd_hiq; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; + mqd->load_mqd = load_mqd; + mqd->update_mqd = update_mqd_hiq; + mqd->destroy_mqd = destroy_mqd; + mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct vi_mqd); +#if defined(CONFIG_DEBUG_FS) + mqd->debugfs_show_mqd = debugfs_show_mqd; +#endif + break; + case KFD_MQD_TYPE_DIQ: + mqd->init_mqd = init_mqd_hiq; mqd->uninit_mqd = uninit_mqd; mqd->load_mqd = load_mqd; mqd->update_mqd = update_mqd_hiq; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; + mqd->mqd_size = sizeof(struct vi_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd; #endif break; case KFD_MQD_TYPE_SDMA: mqd->init_mqd = init_mqd_sdma; - mqd->uninit_mqd = uninit_mqd_sdma; + mqd->uninit_mqd = uninit_mqd_hiq_sdma; mqd->load_mqd = load_mqd_sdma; mqd->update_mqd = update_mqd_sdma; mqd->destroy_mqd = destroy_mqd_sdma; mqd->is_occupied = is_occupied_sdma; + mqd->mqd_size = sizeof(struct vi_sdma_mqd); #if defined(CONFIG_DEBUG_FS) mqd->debugfs_show_mqd = debugfs_show_mqd_sdma; #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 045a229436a0..808194663a7d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -48,7 +48,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm, process_count = pm->dqm->processes_count; queue_count = pm->dqm->queue_count; - compute_queue_count = queue_count - pm->dqm->sdma_queue_count; + compute_queue_count = queue_count - pm->dqm->sdma_queue_count - + pm->dqm->xgmi_sdma_queue_count; /* check if there is over subscription * Note: the arbitration between the number of VMIDs and @@ -227,6 +228,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: pm->pmf = &kfd_vi_pm_funcs; break; case CHIP_VEGA10: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h index f2bcf5c092ea..49ab66b703fa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h @@ -176,8 +176,7 @@ struct pm4_mes_map_process { union { struct { - uint32_t num_gws:6; - uint32_t reserved7:1; + uint32_t num_gws:7; uint32_t sdma_enable:1; uint32_t num_oac:4; uint32_t reserved8:4; @@ -255,11 +254,6 @@ enum mes_map_queues_queue_type_enum { queue_type__mes_map_queues__low_latency_static_queue_vi = 3 }; -enum mes_map_queues_alloc_format_enum { - alloc_format__mes_map_queues__one_per_pipe_vi = 0, -alloc_format__mes_map_queues__all_on_one_pipe_vi = 1 -}; - enum mes_map_queues_engine_sel_enum { engine_sel__mes_map_queues__compute_vi = 0, engine_sel__mes_map_queues__sdma0_vi = 2, @@ -277,9 +271,11 @@ struct pm4_mes_map_queues { struct { uint32_t reserved1:4; enum mes_map_queues_queue_sel_enum queue_sel:2; - uint32_t reserved2:15; + uint32_t reserved5:6; + uint32_t gws_control_queue:1; + uint32_t reserved2:8; enum mes_map_queues_queue_type_enum queue_type:3; - enum mes_map_queues_alloc_format_enum alloc_format:2; + uint32_t reserved3:2; enum mes_map_queues_engine_sel_enum engine_sel:3; uint32_t num_queues:3; } bitfields2; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h index 7c8d9b357749..5466cfe1c3cc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_vi.h @@ -216,11 +216,6 @@ enum mes_map_queues_queue_type_vi_enum { queue_type__mes_map_queues__low_latency_static_queue_vi = 3 }; -enum mes_map_queues_alloc_format_vi_enum { - alloc_format__mes_map_queues__one_per_pipe_vi = 0, -alloc_format__mes_map_queues__all_on_one_pipe_vi = 1 -}; - enum mes_map_queues_engine_sel_vi_enum { engine_sel__mes_map_queues__compute_vi = 0, engine_sel__mes_map_queues__sdma0_vi = 2, @@ -240,7 +235,7 @@ struct pm4_mes_map_queues { enum mes_map_queues_queue_sel_vi_enum queue_sel:2; uint32_t reserved2:15; enum mes_map_queues_queue_type_vi_enum queue_type:3; - enum mes_map_queues_alloc_format_vi_enum alloc_format:2; + uint32_t reserved3:2; enum mes_map_queues_engine_sel_vi_enum engine_sel:3; uint32_t num_queues:3; } bitfields2; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 9e0230965675..b61dc53f42d2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -59,6 +59,7 @@ #define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) #define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) #define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT) +#define KFD_MMAP_TYPE_MMIO (0x0ULL << KFD_MMAP_TYPE_SHIFT) #define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT) #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ @@ -160,6 +161,11 @@ extern int noretry; */ extern int halt_if_hws_hang; +/* + * Whether MEC FW support GWS barriers + */ +extern bool hws_gws_support; + enum cache_policy { cache_policy_coherent, cache_policy_noncoherent @@ -188,6 +194,7 @@ struct kfd_device_info { bool needs_iommu_device; bool needs_pci_atomics; unsigned int num_sdma_engines; + unsigned int num_xgmi_sdma_engines; unsigned int num_sdma_queues_per_engine; }; @@ -258,7 +265,7 @@ struct kfd_dev { bool interrupts_active; /* Debug manager */ - struct kfd_dbgmgr *dbgmgr; + struct kfd_dbgmgr *dbgmgr; /* Firmware versions */ uint16_t mec_fw_version; @@ -279,6 +286,12 @@ struct kfd_dev { /* SRAM ECC flag */ atomic_t sram_ecc_flag; + + /* Compute Profile ref. count */ + atomic_t compute_profile; + + /* Global GWS resource shared b/t processes*/ + void *gws; }; enum kfd_mempool { @@ -326,7 +339,8 @@ enum kfd_queue_type { KFD_QUEUE_TYPE_COMPUTE, KFD_QUEUE_TYPE_SDMA, KFD_QUEUE_TYPE_HIQ, - KFD_QUEUE_TYPE_DIQ + KFD_QUEUE_TYPE_DIQ, + KFD_QUEUE_TYPE_SDMA_XGMI }; enum kfd_queue_format { @@ -441,6 +455,9 @@ struct queue_properties { * * @device: The kfd device that created this queue. * + * @gws: Pointing to gws kgd_mem if this is a gws control queue; NULL + * otherwise. + * * This structure represents user mode compute queues. * It contains all the necessary data to handle such queues. * @@ -462,6 +479,7 @@ struct queue { struct kfd_process *process; struct kfd_dev *device; + void *gws; }; /* @@ -472,6 +490,7 @@ enum KFD_MQD_TYPE { KFD_MQD_TYPE_HIQ, /* for hiq */ KFD_MQD_TYPE_CP, /* for cp queues and diq */ KFD_MQD_TYPE_SDMA, /* for sdma queues */ + KFD_MQD_TYPE_DIQ, /* for diq */ KFD_MQD_TYPE_MAX }; @@ -816,8 +835,6 @@ void uninit_queue(struct queue *q); void print_queue_properties(struct queue_properties *q); void print_queue(struct queue *q); -struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, - struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, struct kfd_dev *dev); struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, @@ -856,6 +873,8 @@ int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); +int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, + void *gws); struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, unsigned int qid); int pqm_get_wave_state(struct process_queue_manager *pqm, @@ -978,6 +997,10 @@ int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); bool kfd_is_locked(void); +/* Compute profile */ +void kfd_inc_compute_active(struct kfd_dev *dev); +void kfd_dec_compute_active(struct kfd_dev *dev); + /* Debugfs */ #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index fcaaf93681ac..da0958625861 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -26,6 +26,7 @@ #include "kfd_device_queue_manager.h" #include "kfd_priv.h" #include "kfd_kernel_queue.h" +#include "amdgpu_amdkfd.h" static inline struct process_queue_node *get_queue_by_qid( struct process_queue_manager *pqm, unsigned int qid) @@ -74,6 +75,55 @@ void kfd_process_dequeue_from_device(struct kfd_process_device *pdd) pdd->already_dequeued = true; } +int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, + void *gws) +{ + struct kfd_dev *dev = NULL; + struct process_queue_node *pqn; + struct kfd_process_device *pdd; + struct kgd_mem *mem = NULL; + int ret; + + pqn = get_queue_by_qid(pqm, qid); + if (!pqn) { + pr_err("Queue id does not match any known queue\n"); + return -EINVAL; + } + + if (pqn->q) + dev = pqn->q->device; + if (WARN_ON(!dev)) + return -ENODEV; + + pdd = kfd_get_process_device_data(dev, pqm->process); + if (!pdd) { + pr_err("Process device data doesn't exist\n"); + return -EINVAL; + } + + /* Only allow one queue per process can have GWS assigned */ + if (gws && pdd->qpd.num_gws) + return -EBUSY; + + if (!gws && pdd->qpd.num_gws == 0) + return -EINVAL; + + if (gws) + ret = amdgpu_amdkfd_add_gws_to_process(pdd->process->kgd_process_info, + gws, &mem); + else + ret = amdgpu_amdkfd_remove_gws_from_process(pdd->process->kgd_process_info, + pqn->q->gws); + if (unlikely(ret)) + return ret; + + pqn->q->gws = mem; + pdd->qpd.num_gws = gws ? amdgpu_amdkfd_get_num_gws(dev->kgd) : 0; + + return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm, + pqn->q); +} + void kfd_process_dequeue_from_all_devices(struct kfd_process *p) { struct kfd_process_device *pdd; @@ -186,8 +236,13 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: - if (dev->dqm->queue_count >= get_num_sdma_queues(dev->dqm)) { - pr_err("Over-subscription is not allowed for SDMA.\n"); + case KFD_QUEUE_TYPE_SDMA_XGMI: + if ((type == KFD_QUEUE_TYPE_SDMA && dev->dqm->sdma_queue_count + >= get_num_sdma_queues(dev->dqm)) || + (type == KFD_QUEUE_TYPE_SDMA_XGMI && + dev->dqm->xgmi_sdma_queue_count + >= get_num_xgmi_sdma_queues(dev->dqm))) { + pr_debug("Over-subscription is not allowed for SDMA.\n"); retval = -EPERM; goto err_create_queue; } @@ -325,6 +380,13 @@ int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid) if (retval != -ETIME) goto err_destroy_queue; } + + if (pqn->q->gws) { + amdgpu_amdkfd_remove_gws_from_process(pqm->process->kgd_process_info, + pqn->q->gws); + pdd->qpd.num_gws = 0; + } + kfree(pqn->q->properties.cu_mask); pqn->q->properties.cu_mask = NULL; uninit_queue(pqn->q); @@ -446,6 +508,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) q = pqn->q; switch (q->properties.type) { case KFD_QUEUE_TYPE_SDMA: + case KFD_QUEUE_TYPE_SDMA_XGMI: seq_printf(m, " SDMA queue on device %x\n", q->device->id); mqd_type = KFD_MQD_TYPE_SDMA; @@ -461,8 +524,7 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) q->properties.type, q->device->id); continue; } - mqd_mgr = q->device->dqm->ops.get_mqd_manager( - q->device->dqm, mqd_type); + mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type]; } else if (pqn->kq) { q = pqn->kq->queue; mqd_mgr = pqn->kq->mqd_mgr; @@ -470,7 +532,6 @@ int pqm_debugfs_mqds(struct seq_file *m, void *data) case KFD_QUEUE_TYPE_DIQ: seq_printf(m, " DIQ on device %x\n", pqn->kq->dev->id); - mqd_type = KFD_MQD_TYPE_HIQ; break; default: seq_printf(m, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 2cb09e088dce..d241a8672599 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -454,6 +454,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.lds_size_in_kb); sysfs_show_32bit_prop(buffer, "gds_size_in_kb", dev->node_props.gds_size_in_kb); + sysfs_show_32bit_prop(buffer, "num_gws", + dev->node_props.num_gws); sysfs_show_32bit_prop(buffer, "wave_front_size", dev->node_props.wave_front_size); sysfs_show_32bit_prop(buffer, "array_count", @@ -476,6 +478,10 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.drm_render_minor); sysfs_show_64bit_prop(buffer, "hive_id", dev->node_props.hive_id); + sysfs_show_32bit_prop(buffer, "num_sdma_engines", + dev->node_props.num_sdma_engines); + sysfs_show_32bit_prop(buffer, "num_sdma_xgmi_engines", + dev->node_props.num_sdma_xgmi_engines); if (dev->gpu) { log_max_watch_addr = @@ -1078,8 +1084,9 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu) local_mem_info.local_mem_size_public; buf[0] = gpu->pdev->devfn; - buf[1] = gpu->pdev->subsystem_vendor; - buf[2] = gpu->pdev->subsystem_device; + buf[1] = gpu->pdev->subsystem_vendor | + (gpu->pdev->subsystem_device << 16); + buf[2] = pci_domain_nr(gpu->pdev->bus); buf[3] = gpu->pdev->device; buf[4] = gpu->pdev->bus->number; buf[5] = lower_32_bits(local_mem_size); @@ -1272,8 +1279,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.vendor_id = gpu->pdev->vendor; dev->node_props.device_id = gpu->pdev->device; - dev->node_props.location_id = PCI_DEVID(gpu->pdev->bus->number, - gpu->pdev->devfn); + dev->node_props.location_id = pci_dev_id(gpu->pdev); dev->node_props.max_engine_clk_fcompute = amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd); dev->node_props.max_engine_clk_ccompute = @@ -1282,6 +1288,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu) gpu->shared_resources.drm_render_minor; dev->node_props.hive_id = gpu->hive_id; + dev->node_props.num_sdma_engines = gpu->device_info->num_sdma_engines; + dev->node_props.num_sdma_xgmi_engines = + gpu->device_info->num_xgmi_sdma_engines; + dev->node_props.num_gws = (hws_gws_support && + dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? + amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0; kfd_fill_mem_clk_max_info(dev); kfd_fill_iolink_non_crat_info(dev); @@ -1299,6 +1311,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) case CHIP_POLARIS10: case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: pr_debug("Adding doorbell packet type capability\n"); dev->node_props.capability |= ((HSA_CAP_DOORBELL_TYPE_1_0 << HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT) & diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 84710cfd23c2..276354aa0fcc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -65,6 +65,7 @@ struct kfd_node_properties { uint32_t max_waves_per_simd; uint32_t lds_size_in_kb; uint32_t gds_size_in_kb; + uint32_t num_gws; uint32_t wave_front_size; uint32_t array_count; uint32_t simd_arrays_per_engine; @@ -78,6 +79,8 @@ struct kfd_node_properties { uint32_t max_engine_clk_fcompute; uint32_t max_engine_clk_ccompute; int32_t drm_render_minor; + uint32_t num_sdma_engines; + uint32_t num_sdma_xgmi_engines; uint16_t marketing_name[KFD_TOPOLOGY_PUBLIC_NAME_SIZE]; }; |