This is a simplied version of the fix by Roy in fdo#93629.  While this
doesn't appear to fix the issues for the users in that report, it's a
real issue that deserves to be resolved.
Reported-by: Roy Spliet <rspliet@eclipso.eu>
Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
14 files changed:
 // Main program loop, very simple, sleeps until woken up by the interrupt
 // handler, pulls a command from the queue and executes its handler
 //
-main:
-       bset $flags $p0
+wait:
        sleep $p0
+       bset $flags $p0
+main:
        mov $r13 #cmd_queue
        call(queue_get)
-       bra $p1 #main
+       bra $p1 #wait
 
        // 0x0000-0x0003 are all context transfers
        cmpu b32 $r14 0x04
 
        0xf11f29f0,
        0xf0080007,
        0x02d00203,
-/* 0x04bb: main */
+/* 0x04bb: wait */
        0xf404bd00,
-       0x28f40031,
+       0x31f40028,
+/* 0x04c1: main */
        0x1cd7f000,
        0xf43921f4,
        0xe4b0f401,
        0x0018fe05,
        0x05b421f5,
 /* 0x04eb: main_not_ctx_xfer */
-       0x94d30ef4,
+       0x94d90ef4,
        0xf5f010ef,
        0x7e21f501,
-       0xc60ef403,
+       0xcc0ef403,
 /* 0x04f8: ih */
        0x80f900f9,
        0xf90188fe,
 
        0x080007f1,
        0xd00203f0,
        0x04bd0002,
-/* 0x0508: main */
-       0xf40031f4,
-       0xd7f00028,
+/* 0x0508: wait */
+       0xf40028f4,
+/* 0x050e: main */
+       0xd7f00031,
        0x3921f424,
        0xb0f401f4,
        0x18f404e4,
        0xfd01e4b6,
        0x18fe051e,
        0x0121f500,
-       0xd30ef406,
+       0xd90ef406,
 /* 0x0538: main_not_ctx_xfer */
        0xf010ef94,
        0x21f501f5,
        0x0ef4037e,
 /* 0x0545: ih */
-       0xf900f9c6,
+       0xf900f9cc,
        0x0188fe80,
        0x90f980f9,
        0xb0f9a0f9,
 
        0x080007f1,
        0xd00203f0,
        0x04bd0002,
-/* 0x0508: main */
-       0xf40031f4,
-       0xd7f00028,
+/* 0x0508: wait */
+       0xf40028f4,
+/* 0x050e: main */
+       0xd7f00031,
        0x3921f424,
        0xb0f401f4,
        0x18f404e4,
        0xfd01e4b6,
        0x18fe051e,
        0x0121f500,
-       0xd30ef406,
+       0xd90ef406,
 /* 0x0538: main_not_ctx_xfer */
        0xf010ef94,
        0x21f501f5,
        0x0ef4037e,
 /* 0x0545: ih */
-       0xf900f9c6,
+       0xf900f9cc,
        0x0188fe80,
        0x90f980f9,
        0xb0f9a0f9,
 
        0x300007f1,
        0xd00203f0,
        0x04bd0002,
-/* 0x0508: main */
-       0xf40031f4,
-       0xd7f00028,
+/* 0x0508: wait */
+       0xf40028f4,
+/* 0x050e: main */
+       0xd7f00031,
        0x3921f424,
        0xb0f401f4,
        0x18f404e4,
        0xfd01e4b6,
        0x18fe051e,
        0x0121f500,
-       0xd30ef406,
+       0xd90ef406,
 /* 0x0538: main_not_ctx_xfer */
        0xf010ef94,
        0x21f501f5,
        0x0ef4037e,
 /* 0x0545: ih */
-       0xf900f9c6,
+       0xf900f9cc,
        0x0188fe80,
        0x90f980f9,
        0xb0f9a0f9,
 
        0x801f29f0,
        0xf6023000,
        0x04bd0002,
-/* 0x0448: main */
-       0xf40031f4,
-       0x240d0028,
+/* 0x0448: wait */
+       0xf40028f4,
+/* 0x044e: main */
+       0x240d0031,
        0x0000377e,
        0xb0f401f4,
        0x18f404e4,
        0x0018fe05,
        0x00051f7e,
 /* 0x0477: main_not_ctx_xfer */
-       0x94d40ef4,
+       0x94da0ef4,
        0xf5f010ef,
        0x02f87e01,
-       0xc70ef400,
+       0xcd0ef400,
 /* 0x0484: ih */
        0x80f900f9,
        0xf90188fe,
 
        0x1f29f024,
        0x02300080,
        0xbd0002f6,
-/* 0x0571: main */
-       0x0031f404,
-       0x0d0028f4,
+/* 0x0571: wait */
+       0x0028f404,
+/* 0x0577: main */
+       0x0d0031f4,
        0x00377e24,
        0xf401f400,
        0xf404e4b0,
        0xfd01e4b6,
        0x18fe051e,
        0x06487e00,
-       0xd40ef400,
+       0xda0ef400,
 /* 0x05a0: main_not_ctx_xfer */
        0xf010ef94,
        0xf87e01f5,
        0x0ef40002,
 /* 0x05ad: ih */
-       0xf900f9c7,
+       0xf900f9cd,
        0x0188fe80,
        0x90f980f9,
        0xb0f9a0f9,
 
 // Main program loop, very simple, sleeps until woken up by the interrupt
 // handler, pulls a command from the queue and executes its handler
 //
-main:
+wait:
        // sleep until we have something to do
-       bset $flags $p0
        sleep $p0
+       bset $flags $p0
+main:
        mov $r13 #cmd_queue
        call(queue_get)
-       bra $p1 #main
+       bra $p1 #wait
 
        // context switch, requested by GPU?
        cmpu b32 $r14 0x4001
 
        0x080007f1,
        0xd00203f0,
        0x04bd0001,
-/* 0x0564: main */
-       0xf40031f4,
-       0xd7f00028,
+/* 0x0564: wait */
+       0xf40028f4,
+/* 0x056a: main */
+       0xd7f00031,
        0x3921f410,
        0xb1f401f4,
        0xf54001e4,
        0x170007f1,
        0xd00203f0,
        0x04bd0009,
-       0xff080ef5,
+       0xff0e0ef5,
 /* 0x0660: main_not_ctx_switch */
        0xf401e4b0,
        0xf2b90d1b,
        0xf501f5f0,
        0xf5037e21,
 /* 0x06b3: main_done */
-       0xbdfeb50e,
+       0xbdfebb0e,
        0x1f29f024,
        0x080007f1,
        0xd00203f0,
        0x04bd0002,
-       0xfea00ef5,
+       0xfea60ef5,
 /* 0x06c8: ih */
        0x80f900f9,
        0xf90188fe,
 
        0x080007f1,
        0xd00203f0,
        0x04bd0001,
-/* 0x0564: main */
-       0xf40031f4,
-       0xd7f00028,
+/* 0x0564: wait */
+       0xf40028f4,
+/* 0x056a: main */
+       0xd7f00031,
        0x3921f410,
        0xb1f401f4,
        0xf54001e4,
        0x170007f1,
        0xd00203f0,
        0x04bd0009,
-       0xff080ef5,
+       0xff0e0ef5,
 /* 0x0660: main_not_ctx_switch */
        0xf401e4b0,
        0xf2b90d1b,
        0xf501f5f0,
        0xf5037e21,
 /* 0x06b3: main_done */
-       0xbdfeb50e,
+       0xbdfebb0e,
        0x1f29f024,
        0x080007f1,
        0xd00203f0,
        0x04bd0002,
-       0xfea00ef5,
+       0xfea60ef5,
 /* 0x06c8: ih */
        0x80f900f9,
        0xf90188fe,
 
        0x080007f1,
        0xd00203f0,
        0x04bd0001,
-/* 0x0564: main */
-       0xf40031f4,
-       0xd7f00028,
+/* 0x0564: wait */
+       0xf40028f4,
+/* 0x056a: main */
+       0xd7f00031,
        0x3921f410,
        0xb1f401f4,
        0xf54001e4,
        0x170007f1,
        0xd00203f0,
        0x04bd0009,
-       0xff080ef5,
+       0xff0e0ef5,
 /* 0x0660: main_not_ctx_switch */
        0xf401e4b0,
        0xf2b90d1b,
        0xf501f5f0,
        0xf5037e21,
 /* 0x06b3: main_done */
-       0xbdfeb50e,
+       0xbdfebb0e,
        0x1f29f024,
        0x080007f1,
        0xd00203f0,
        0x04bd0002,
-       0xfea00ef5,
+       0xfea60ef5,
 /* 0x06c8: ih */
        0x80f900f9,
        0xf90188fe,
 
        0x300007f1,
        0xd00203f0,
        0x04bd0001,
-/* 0x0564: main */
-       0xf40031f4,
-       0xd7f00028,
+/* 0x0564: wait */
+       0xf40028f4,
+/* 0x056a: main */
+       0xd7f00031,
        0x3921f410,
        0xb1f401f4,
        0xf54001e4,
        0x170007f1,
        0xd00203f0,
        0x04bd0009,
-       0xff080ef5,
+       0xff0e0ef5,
 /* 0x0660: main_not_ctx_switch */
        0xf401e4b0,
        0xf2b90d1b,
        0xf501f5f0,
        0xf5037e21,
 /* 0x06b3: main_done */
-       0xbdfeb50e,
+       0xbdfebb0e,
        0x1f29f024,
        0x300007f1,
        0xd00203f0,
        0x04bd0002,
-       0xfea00ef5,
+       0xfea60ef5,
 /* 0x06c8: ih */
        0x80f900f9,
        0xf90188fe,
 
        0x1f19f014,
        0x02300080,
        0xbd0001f6,
-/* 0x0491: main */
-       0x0031f404,
-       0x0d0028f4,
+/* 0x0491: wait */
+       0x0028f404,
+/* 0x0497: main */
+       0x0d0031f4,
        0x00377e10,
        0xf401f400,
        0x4001e4b1,
        0x09f60217,
        0xf504bd00,
 /* 0x056b: main_not_ctx_switch */
-       0xb0ff2a0e,
+       0xb0ff300e,
        0x1bf401e4,
        0x7ef2b20c,
        0xf4000820,
        0x7e01f5f0,
        0xf50002f8,
 /* 0x05b7: main_done */
-       0xbdfede0e,
+       0xbdfee40e,
        0x1f29f024,
        0x02300080,
        0xbd0002f6,
-       0xcc0ef504,
+       0xd20ef504,
 /* 0x05c9: ih */
        0xf900f9fe,
        0x0188fe80,
 
        0x1f19f014,
        0x02300080,
        0xbd0001f6,
-/* 0x0491: main */
-       0x0031f404,
-       0x0d0028f4,
+/* 0x0491: wait */
+       0x0028f404,
+/* 0x0497: main */
+       0x0d0031f4,
        0x00377e10,
        0xf401f400,
        0x4001e4b1,
        0x09f60217,
        0xf504bd00,
 /* 0x056b: main_not_ctx_switch */
-       0xb0ff2a0e,
+       0xb0ff300e,
        0x1bf401e4,
        0x7ef2b20c,
        0xf4000820,
        0x7e01f5f0,
        0xf50002f8,
 /* 0x05b7: main_done */
-       0xbdfede0e,
+       0xbdfee40e,
        0x1f29f024,
        0x02300080,
        0xbd0002f6,
-       0xcc0ef504,
+       0xd20ef504,
 /* 0x05c9: ih */
        0xf900f9fe,
        0x0188fe80,