I know that but... how on the earth the compiler will know the value after 60 seconds?
It doesn't know the value after 60 seconds, but it knows the value at the end of the loop, which is written in the code. For years, optimizing compilers have been able to recognize a number of loop idioms, especially such simple ones as
- Code: Tout sélectionner
do {
counter++;
} while (counter < 349700000);
Such code is turned into
- Code: Tout sélectionner
counter = 349700000;
by optimizing compilers; then, Dead Store Elimination will erase this assignment and the counter variable, since it's not used later.
Unless the compiler used for the fx-9860g absolutely stinks, or the benchmark is compiled without optimization, the program should print "end" immediately.
Anyway yes, it looks suspicious, but there is a simple solution: who looks suspicious, for the reader, isn't counted by the reader himself.
If the #1 spot in the benchmark is a fluke (which remains to be confirmed), it would reduce the benchmark's credibility.
Now....unleash your Texas instruments!
I wrote I would, so here are a couple TI-68k/ASM C programs, made yesterday evening and this morning
NOTE: building them requires GCC4TI, they won't compile with the older, unmaintained and
much harder to install TIGCC:
1) File addloop_register_polling.c:
- Code: Tout sélectionner
// addloop_register_polling.c: optimize counting to the maximum, through keeping the value in a register and writing the main loop in ASM, so as to avoid compiler pessimizations.
#define MIN_AMS 101
#define USE_TI89
#define USE_TI92P
#define USE_V200
#define USE_TI89T
#define NO_CALC_DETECT
#define OPTIMIZE_ROM_CALLS
#define RETURN_VALUE
#include <stdint.h>
#include <system.h>
#include <args.h>
#include <estack.h>
#include <peekpoke.h>
#include <intr.h>
#define TIMER_START_VAL (100000UL)
void _main(void) {
uint32_t i = 0; // We don't want to
short orig_rate = PRG_getRate();
unsigned short orig_start = PRG_getStart();
unsigned char * ON_key_status = (unsigned char *)0x60001A;
unsigned long val = 0;
// Make the system timer an order of magnitude more precise;
// NOTE: this code assumes a HW2+ TI-68k, i.e. anything since 1999.
PRG_setRate(1); // Increment counter at a rate of 2^19/2^9 Hz
PRG_setStart(0xCE); // Trigger the interrupt every 257 - 0xCE = 51 increments ~ 20.07 Hz.
// The PRG_getStart() above effectively waited for the interrupt to trigger, so we don't need another wait.
/*OSRegisterTimer(USER_TIMER, 1);
while (!OSTimerExpired(USER_TIMER));
OSFreeTimer(USER_TIMER);*/
OSRegisterTimer(USER_TIMER, TIMER_START_VAL);
// Main loop :)
// The assembly snippet is the equivalent of
/*
do {
i++;
} while (*(volatile unsigned char *)ON_key_status & 2);
*/
// but it lets no compiler pessimization, such as constant-propagating the ON_key_status variable away (sigh), occur.
asm volatile("lloop:\n"
" addq.l #1, %0\n"
" btst.b #1, (%1)\n"
" bne.s lloop\n"
: "=d"(i) : "a"(ON_key_status));
// Retrieve timer value.
val = TIMER_START_VAL - OSTimerCurVal(USER_TIMER);
OSFreeTimer(USER_TIMER);
// Give some time for the ON key to come back up.
OSRegisterTimer(USER_TIMER, 4);
while (!OSTimerExpired(USER_TIMER));
OSFreeTimer(USER_TIMER);
OSClearBreak();
// Push arguments onto the RPN stack: clean arguments up, then create a list.
while (GetArgType (top_estack) != END_TAG) {
top_estack = next_expression_index (top_estack);
}
top_estack--;
push_END_TAG();
push_longint(i);
push_longint(val);
push_LIST_TAG();
// Restore old system state.
PRG_setRate(orig_rate);
PRG_setStart(orig_start);
}
2) File addloop_memory_polling.c:
- Code: Tout sélectionner
// addloop_memory_polling.c: don't optimize counting that much, through "volatile" which triggers three instructions instead of just one for dealing with memory and an address which gets constant-propagated instead of being kept in a register.
#define MIN_AMS 101
#define USE_TI89
#define USE_TI92P
#define USE_V200
#define USE_TI89T
#define NO_CALC_DETECT
#define OPTIMIZE_ROM_CALLS
#define RETURN_VALUE
#include <stdint.h>
#include <system.h>
#include <args.h>
#include <estack.h>
#include <peekpoke.h>
#include <intr.h>
#define TIMER_START_VAL (100000UL)
void _main(void) {
volatile uint32_t i = 0;
short orig_rate = PRG_getRate();
unsigned short orig_start = PRG_getStart();
volatile unsigned char * ON_key_status = (volatile unsigned char *)0x60001A;
unsigned long val = 0;
// Make the system timer an order of magnitude more precise;
// NOTE: this code assumes a HW2+ TI-68k, i.e. anything since 1999.
PRG_setRate(1); // Increment counter at a rate of 2^19/2^9 Hz
PRG_setStart(0xCE); // Trigger the interrupt every 257 - 0xCE = 51 increments ~ 20.07 Hz.
// The PRG_getStart() above effectively waited for the interrupt to trigger, so we don't need another wait.
/*OSRegisterTimer(USER_TIMER, 1);
while (!OSTimerExpired(USER_TIMER));
OSFreeTimer(USER_TIMER);*/
OSRegisterTimer(USER_TIMER, TIMER_START_VAL);
// Main loop :)
// Let compiler pessimizations inherent to "volatile", such as:
// * reading and writing i in memory instead of incrementing it directly;
// * constant-propagating the ON_key_status variable away.
// occur.
do {
i++;
} while (*ON_key_status & 2);
// Retrieve timer value.
val = TIMER_START_VAL - OSTimerCurVal(USER_TIMER);
OSFreeTimer(USER_TIMER);
// Give some time for the ON key to come back up.
OSRegisterTimer(USER_TIMER, 4);
while (!OSTimerExpired(USER_TIMER));
OSFreeTimer(USER_TIMER);
OSClearBreak();
// Push arguments onto the RPN stack: clean arguments up, then create a list.
while (GetArgType (top_estack) != END_TAG) {
top_estack = next_expression_index (top_estack);
}
top_estack--;
push_END_TAG();
push_longint(i);
push_longint(val);
push_LIST_TAG();
// Restore old system state.
PRG_setRate(orig_rate);
PRG_setStart(orig_start);
}
3) Build script - all flags but -O3 reduce size but have no effect on code generation for the main loop:
- Code: Tout sélectionner
tigcc -v -O3 -Wall -W -mpcrel --optimize-code --cut-ranges --reorder-sections --remove-unused --merge-constants -fmerge-all-constants -Wa,--all-relocs -Wa,-l -fverbose-asm -save-temps -o addloop1 addloop_register_polling.c
tigcc -v -O3 -Wall -W -mpcrel --optimize-code --cut-ranges --reorder-sections --remove-unused --merge-constants -fmerge-all-constants -Wa,--all-relocs -Wa,-l -fverbose-asm -save-temps -o addloop2 addloop_memory_polling.c
4) Results on 89T HW4 running AMS 3.10 patched with my tiosmod+amspatch, the first element of each list being the number of timer ticks at (2^19/2^9)/53 ~ 20.07 Hz and the second element being the value of the counter when ON is pressed:
* addloop1 (addloop_register_polling): {1203, 24700949} {1237, 25423732} {1211, 24846885} (very coherent with each other)
* addloop2 (addloop_memory_polling): {1206, 9769092} {1214, 9827570} (again, coherent with each other)
Comments:
* the main loop is a tiny code snippet buried into the rest of accuracy-increasing measures and dealing with the consequences of pressing the ON key;
* the main loop in addloop1 is a 1:1 copy of that of the HP-50g benchmark, and shows the 89T is between 6x and 7x slower than the HP-50g, which is easily explained, as I posted on Cemetech;
* the main loop in addloop2 is closer to interpreted languages, since at least, the variable is read from + written to memory, and it shows ~2.5x slowdown.