net-libs/xulrunner/files/1009-armv4t-nanojit-v2.patch


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320

From: Mike Hommey <glandium@debian.org>
Date: Fri, 30 Apr 2010 14:32:41 +0200
Subject: Add nanojit support for ARMv4T

Thanks Albin Tonnerre for the initial patch.
https://bugzilla.mozilla.org/show_bug.cgi?id=552624
---
 js/src/nanojit/NativeARM.cpp |  105 ++++++++++++++++++++++------------------
 js/src/nanojit/avmplus.h     |    2 +
 js/src/nanojit/njcpudetect.h |  111 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 170 insertions(+), 48 deletions(-)
 create mode 100644 js/src/nanojit/njcpudetect.h

diff --git a/js/src/nanojit/NativeARM.cpp b/js/src/nanojit/NativeARM.cpp
index 9387191..a50898c 100644
--- a/js/src/nanojit/NativeARM.cpp
+++ b/js/src/nanojit/NativeARM.cpp
@@ -61,6 +61,8 @@ extern "C" void __clear_cache(void *BEG, void *END);
 
 #ifdef FEATURE_NANOJIT
 
+#define ARM_ARCH_AT_LEAST(wanted) ((NJ_COMPILER_ARM_ARCH >= wanted) || (ARM_ARCH >= wanted))
+
 namespace nanojit
 {
 
@@ -114,49 +116,50 @@ Assembler::CountLeadingZeroes(uint32_t data)
 {
     uint32_t    leading_zeroes;
 
-    // We can't do CLZ on anything earlier than ARMv5. Architectures as early
-    // as that aren't supported, but assert that we aren't running on one
-    // anyway.
-    // If ARMv4 support is required in the future for some reason, we can do a
-    // run-time check on config.arch and fall back to the C routine, but for
-    // now we can avoid the cost of the check as we don't intend to support
-    // ARMv4 anyway.
-    NanoAssert(ARM_ARCH >= 5);
-
 #if defined(__ARMCC__)
     // ARMCC can do this with an intrinsic.
     leading_zeroes = __clz(data);
 
-// current Android GCC compiler incorrectly refuses to compile 'clz' for armv5
-// (even though this is a legal instruction there). Since we currently only compile for ARMv5
-// for emulation, we don't care too much (but we DO care for ARMv6+ since those are "real"
-// devices).
-#elif defined(__GNUC__) && !(defined(ANDROID) && __ARM_ARCH__ <= 5) 
+    if (0) // We don't need the fallback
+#elif defined(__GNUC__)
     // GCC can use inline assembler to insert a CLZ instruction.
-    __asm (
-        "   clz     %0, %1  \n"
-        :   "=r"    (leading_zeroes)
-        :   "r"     (data)
-    );
+    // Targetting armv5t allows a toolchain with armv4t default target to
+    // still build with clz. On Android gcc compiler, clz is not supported
+    // with a target smaller than armv7.
+    if (ARM_ARCH_AT_LEAST(5))
+        __asm (
+#if defined(ANDROID) && NJ_COMPILER_ARM_ARCH <= 5
+            ".arch armv7\n"
+#elif (NJ_COMPILER_ARM_ARCH < 5)
+            ".arch armv5t\n"
+#endif
+            "   clz     %0, %1  \n"
+            :   "=r"    (leading_zeroes)
+            :   "r"     (data)
+        );
+    else
 #elif defined(WINCE)
     // WinCE can do this with an intrinsic.
     leading_zeroes = _CountLeadingZeros(data);
-#else
-    // Other platforms must fall back to a C routine. This won't be as
-    // efficient as the CLZ instruction, but it is functional.
-    uint32_t    try_shift;
-
-    leading_zeroes = 0;
-
-    // This loop does a bisection search rather than the obvious rotation loop.
-    // This should be faster, though it will still be no match for CLZ.
-    for (try_shift = 16; try_shift != 0; try_shift /= 2) {
-        uint32_t    shift = leading_zeroes + try_shift;
-        if (((data << shift) >> shift) == data) {
-            leading_zeroes = shift;
+
+    if (0) // We don't need the fallback
+#endif
+    {
+        // Other platforms must fall back to a C routine. This won't be as
+        // efficient as the CLZ instruction, but it is functional.
+        uint32_t    try_shift;
+
+        leading_zeroes = 0;
+
+        // This loop does a bisection search rather than the obvious rotation loop.
+        // This should be faster, though it will still be no match for CLZ.
+        for (try_shift = 16; try_shift != 0; try_shift /= 2) {
+            uint32_t    shift = leading_zeroes + try_shift;
+            if (((data << shift) >> shift) == data) {
+                leading_zeroes = shift;
+            }
         }
     }
-#endif
 
     // Assert that the operation worked!
     NanoAssert(((0xffffffff >> leading_zeroes) & data) == data);
@@ -555,13 +558,18 @@ NIns*
 Assembler::genEpilogue()
 {
     // On ARMv5+, loading directly to PC correctly handles interworking.
-    // Note that we don't support anything older than ARMv5.
-    NanoAssert(ARM_ARCH >= 5);
-
-    RegisterMask savingMask = rmask(FP) | rmask(PC);
+    // On ARMv4T, interworking is not handled properly, therefore, we pop
+    // lr into ip and use bx ip to avoid that.
+    if (ARM_ARCH_AT_LEAST(5)) {
+        RegisterMask savingMask = rmask(FP) | rmask(PC);
 
-    POP_mask(savingMask); // regs
+        POP_mask(savingMask); // regs
+    } else {
+        RegisterMask savingMask = rmask(FP) | rmask(IP);
 
+        BX(IP);
+        POP_mask(savingMask); // regs
+    }
     return _nIns;
 }
 
@@ -1502,7 +1510,7 @@ Assembler::BranchWithLink(NIns* addr)
 
     // ARMv5 and above can use BLX <imm> for branches within ±32MB of the
     // PC and BLX Rm for long branches.
-    if (isS24(offs>>2)) {
+    if (isS24(offs>>2) && (ARM_ARCH_AT_LEAST(5))) {
         // the value we need to stick in the instruction; masked,
         // because it will be sign-extended back to 32 bits.
         intptr_t offs2 = (offs>>2) & 0xffffff;
@@ -1519,7 +1527,6 @@ Assembler::BranchWithLink(NIns* addr)
             // We need to emit an ARMv5+ instruction, so assert that we have a
             // suitable processor. Note that we don't support ARMv4(T), but
             // this serves as a useful sanity check.
-            NanoAssert(ARM_ARCH >= 5);
 
             // The (pre-shifted) value of the "H" bit in the BLX encoding.
             uint32_t    H = (offs & 0x2) << 23;
@@ -1543,11 +1550,6 @@ Assembler::BranchWithLink(NIns* addr)
 inline void
 Assembler::BLX(Register addr, bool chk /* = true */)
 {
-    // We need to emit an ARMv5+ instruction, so assert that we have a suitable
-    // processor. Note that we don't support ARMv4(T), but this serves as a
-    // useful sanity check.
-    NanoAssert(ARM_ARCH >= 5);
-
     NanoAssert(IsGpReg(addr));
     // There is a bug in the WinCE device emulator which stops "BLX LR" from
     // working as expected. Assert that we never do that!
@@ -1558,8 +1560,15 @@ Assembler::BLX(Register addr, bool chk /* = true */)
     }
 
     // BLX IP
-    *(--_nIns) = (NIns)( (COND_AL) | (0x12<<20) | (0xFFF<<8) | (0x3<<4) | (addr) );
-    asm_output("blx ip");
+    if (ARM_ARCH_AT_LEAST(5)) {
+        *(--_nIns) = (NIns)( (COND_AL) | (0x12<<20) | (0xFFF<<8) | (0x3<<4) | (addr) );
+        asm_output("blx %s", gpn(addr));
+    } else {
+        *(--_nIns) = (NIns)( (COND_AL) | (0x12fff1 << 4) | (addr) );
+        asm_output("bx %s", gpn(addr));
+        *(--_nIns) = (NIns)( (COND_AL) | (0x1A0 << 16) | (0xE << 12) | 0xF );
+        asm_output("mov lr, pc");
+    }
 }
 
 // Emit the code required to load a memory address into a register as follows:
@@ -2177,7 +2186,7 @@ Assembler::asm_arith(LInsp ins)
             // common for (rr == ra) and is thus likely to be the most
             // efficient case; if ra is no longer used after this LIR
             // instruction, it is re-used for the result register (rr).
-            if ((ARM_ARCH > 5) || (rr != rb)) {
+            if ((ARM_ARCH_AT_LEAST(6)) || (rr != rb)) {
                 // Newer cores place no restrictions on the registers used in a
                 // MUL instruction (compared to other arithmetic instructions).
                 MUL(rr, rb, ra);
diff --git a/js/src/nanojit/avmplus.h b/js/src/nanojit/avmplus.h
index ffc0873..e86f22e 100644
--- a/js/src/nanojit/avmplus.h
+++ b/js/src/nanojit/avmplus.h
@@ -50,6 +50,8 @@
 #include "jstypes.h"
 #include "jsstdint.h"
 
+#include "njcpudetect.h"
+
 #ifdef AVMPLUS_ARM
 #define ARM_ARCH   config.arch
 #define ARM_VFP    config.vfp
diff --git a/js/src/nanojit/njcpudetect.h b/js/src/nanojit/njcpudetect.h
new file mode 100644
index 0000000..79ea90b
--- /dev/null
+++ b/js/src/nanojit/njcpudetect.h
@@ -0,0 +1,111 @@
+/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
+/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
+/* ***** BEGIN LICENSE BLOCK *****
+ * Version: MPL 1.1/GPL 2.0/LGPL 2.1
+ *
+ * The contents of this file are subject to the Mozilla Public License Version
+ * 1.1 (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ * http://www.mozilla.org/MPL/
+ *
+ * Software distributed under the License is distributed on an "AS IS" basis,
+ * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
+ * for the specific language governing rights and limitations under the
+ * License.
+ *
+ * The Original Code is [Open Source Virtual Machine].
+ *
+ * The Initial Developer of the Original Code is
+ * Adobe System Incorporated.
+ * Portions created by the Initial Developer are Copyright (C) 2004-2007
+ * the Initial Developer. All Rights Reserved.
+ *
+ * Contributor(s):
+ *   Adobe AS3 Team
+ *
+ * Alternatively, the contents of this file may be used under the terms of
+ * either the GNU General Public License Version 2 or later (the "GPL"), or
+ * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
+ * in which case the provisions of the GPL or the LGPL are applicable instead
+ * of those above. If you wish to allow use of your version of this file only
+ * under the terms of either the GPL or the LGPL, and not to allow others to
+ * use your version of this file under the terms of the MPL, indicate your
+ * decision by deleting the provisions above and replace them with the notice
+ * and other provisions required by the GPL or the LGPL. If you do not delete
+ * the provisions above, a recipient may use your version of this file under
+ * the terms of any one of the MPL, the GPL or the LGPL.
+ *
+ * ***** END LICENSE BLOCK ***** */
+
+#ifndef __njcpudetect__
+#define __njcpudetect__
+
+/***
+ * Note: this file should not include *any* other files, nor should it wrap
+ * itself in ifdef FEATURE_NANOJIT, nor should it do anything other than
+ * define preprocessor symbols.
+ */
+
+/***
+ * NJ_COMPILER_ARM_ARCH attempts to specify the minimum ARM architecture
+ * that the C++ compiler has specified. Note that although Config::arm_arch
+ * is initialized to this value by default, there is no requirement that they
+ * be in sync.
+ *
+ * Note, this is done via #define so that downstream preprocessor usage can
+ * examine it, but please don't attempt to redefine it.
+ *
+ * Note, this is deliberately not encased in "ifdef NANOJIT_ARM", as this file
+ * may be included before that is defined. On non-ARM platforms we will hit the
+ * "Unable to determine" case.
+ */
+
+// GCC and RealView usually define __ARM_ARCH__
+#if defined(__ARM_ARCH__)
+
+    #define NJ_COMPILER_ARM_ARCH __ARM_ARCH__
+
+// ok, try well-known GCC flags ( see http://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html )
+#elif     defined(__ARM_ARCH_7__) || \
+        defined(__ARM_ARCH_7A__) || \
+        defined(__ARM_ARCH_7M__) || \
+        defined(__ARM_ARCH_7R__) || \
+        defined(_ARM_ARCH_7)
+
+    #define NJ_COMPILER_ARM_ARCH 7
+
+#elif   defined(__ARM_ARCH_6__) || \
+        defined(__ARM_ARCH_6J__) || \
+        defined(__ARM_ARCH_6T2__) || \
+        defined(__ARM_ARCH_6Z__) || \
+        defined(__ARM_ARCH_6ZK__) || \
+        defined(__ARM_ARCH_6M__) || \
+        defined(_ARM_ARCH_6)
+
+    #define NJ_COMPILER_ARM_ARCH 6
+
+#elif   defined(__ARM_ARCH_5__) || \
+        defined(__ARM_ARCH_5T__) || \
+        defined(__ARM_ARCH_5E__) || \
+        defined(__ARM_ARCH_5TE__)
+
+    #define NJ_COMPILER_ARM_ARCH 5
+
+#elif   defined(__ARM_ARCH_4__) || \
+        defined(__ARM_ARCH_4T__)
+
+    #define NJ_COMPILER_ARM_ARCH 4
+
+// Visual C has its own mojo
+#elif defined(_MSC_VER) && defined(_M_ARM)
+
+    #define NJ_COMPILER_ARM_ARCH _M_ARM
+
+#else
+
+    // non-numeric value
+    #define NJ_COMPILER_ARM_ARCH "Unable to determine valid NJ_COMPILER_ARM_ARCH (nanojit only supports ARMv5 or later)"
+
+#endif
+
+#endif // __njcpudetect__