Annotation of src/sys/arch/i386/i386/lock_stubs.S, Revision 1.2.8.1
1.2.8.1 ! ad 1: /* $NetBSD: lock_stubs.S,v 1.2 2007/02/09 21:55:04 ad Exp $ */
1.2 ad 2:
3: /*-
4: * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
5: * All rights reserved.
6: *
7: * This code is derived from software contributed to The NetBSD Foundation
8: * by Andrew Doran.
9: *
10: * Redistribution and use in source and binary forms, with or without
11: * modification, are permitted provided that the following conditions
12: * are met:
13: * 1. Redistributions of source code must retain the above copyright
14: * notice, this list of conditions and the following disclaimer.
15: * 2. Redistributions in binary form must reproduce the above copyright
16: * notice, this list of conditions and the following disclaimer in the
17: * documentation and/or other materials provided with the distribution.
18: * 3. All advertising materials mentioning features or use of this software
19: * must display the following acknowledgement:
20: * This product includes software developed by the NetBSD
21: * Foundation, Inc. and its contributors.
22: * 4. Neither the name of The NetBSD Foundation nor the names of its
23: * contributors may be used to endorse or promote products derived
24: * from this software without specific prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27: * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28: * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29: * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30: * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31: * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32: * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33: * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35: * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36: * POSSIBILITY OF SUCH DAMAGE.
37: */
38:
39: /*
40: * Note on the 80386: the 80386 doesn't have a compare-and-exchange
41: * operation. Stepping A of the i486 has these instructions wired to a
42: * different opcode, so should use these stubs also. They are rare, so
43: * we don't make the effort.
44: *
45: * The sizes listed against each function are for a kernel compiled
46: * with options MULTIPROCESSOR && DIAGNOSTIC && !I386_CPU. The offsets
47: * are for a kernel compiled without the I386_CPU option. Where possible
48: * we make each routine fit into an assumed 64-byte cache line.
49: */
50:
51: #include "opt_multiprocessor.h"
52: #include "opt_lockdebug.h"
53: #include "opt_cputype.h"
54: #include "opt_ddb.h"
55:
56: #include <machine/asm.h>
57: #include <machine/cputypes.h>
58:
59: #include "assym.h"
60:
61: #if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
62: #define FULL
63: #endif
64:
65: #if defined(I386_CPU)
66: #define STUB(name, alternate) \
67: NENTRY(name) ; \
68: cmpl $CPUCLASS_386, _C_LABEL(cpu_class) ; \
69: movl 4(%esp), %edx ; \
70: je _C_LABEL(alternate)
71: #define ALIGN64 .align 16 /* don't bother */
72: #define ALIGN32 .align 16 /* don't bother */
73: #else
74: #define STUB(name, alternate) \
75: NENTRY(name) ; \
76: movl 4(%esp), %edx
77: #define ALIGN64 .align 64
78: #define ALIGN32 .align 32
79: #endif
80:
81: #if defined(MULTIPROCESSOR)
82: #define LOCK lock
83: #else
84: #define LOCK /* nothing */
85: #endif
86:
87: #define END(name,a) .align a; LABEL(name)
88:
89: #if !defined(LOCKDEBUG)
90:
91: /*
92: * void mutex_enter(kmutex_t *mtx);
93: *
94: * Acquire a mutex and post a load fence.
95: */
96: ALIGN64
97:
98: STUB(mutex_enter, mutex_vector_enter) /* 0x0000, 20 bytes */
99: movl CPUVAR(CURLWP), %ecx
100: xorl %eax, %eax
101: LOCK
102: cmpxchgl %ecx, MTX_OWNER(%edx)
103: jnz,pn _C_LABEL(mutex_vector_enter)
104: ret
105:
106: /*
107: * void mutex_exit(kmutex_t *mtx);
108: *
109: * Release a mutex and post a load fence.
110: *
111: * See comments in mutex_vector_enter() about doing this operation unlocked
112: * on multiprocessor systems, and comments in arch/x86/include/lock.h about
113: * memory ordering on Intel x86 systems.
114: */
115: ALIGN32
116:
117: STUB(mutex_exit, mutex_vector_exit) /* 0x0020, 19 bytes */
118: movl CPUVAR(CURLWP), %eax
119: xorl %ecx, %ecx
120: cmpxchgl %ecx, MTX_OWNER(%edx)
121: jnz,pn _C_LABEL(mutex_vector_exit)
122: ret
123:
124: /*
125: * void rw_enter(krwlock_t *rwl, krw_t op);
126: *
127: * Acquire one hold on a RW lock.
128: */
129: ALIGN64
130:
131: STUB(rw_enter, rw_vector_enter) /* 0x0040, 60 bytes */
132: cmpl $RW_READER, 8(%esp)
133: jne 2f
134:
135: /*
136: * Reader
137: */
138: 1: movl RW_OWNER(%edx), %eax
139: testb $(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al
140: leal RW_READ_INCR(%eax), %ecx
141: jnz,pn _C_LABEL(rw_vector_enter)
142: LOCK
143: cmpxchgl %ecx, RW_OWNER(%edx)
144: jnz,pn 1b
145: ret
146:
147: /*
148: * Writer
149: */
150: 2: movl CPUVAR(CURLWP), %ecx
151: xorl %eax, %eax
152: orl $RW_WRITE_LOCKED, %ecx
153: LOCK
154: cmpxchgl %ecx, RW_OWNER(%edx)
155: jnz,pn _C_LABEL(rw_vector_enter)
156: ret
157:
158: /*
159: * void rw_exit(krwlock_t *rwl);
160: *
161: * Release one hold on a RW lock.
162: */
163: ALIGN64
164:
165: STUB(rw_exit, rw_vector_exit) /* 0x0080, 61 bytes */
166: movl RW_OWNER(%edx), %eax
167: testb $RW_WRITE_LOCKED, %al
168: jnz 2f
169:
170: /*
171: * Reader
172: */
173: 1: testb $RW_HAS_WAITERS, %al
174: jnz,pn 3f
175: cmpl $RW_READ_INCR, %eax
176: leal -RW_READ_INCR(%eax), %ecx
177: jb,pn 3f
178: LOCK
179: cmpxchgl %ecx, RW_OWNER(%edx)
180: jnz,pn 1b
181: ret
182:
183: /*
184: * Writer
185: */
186: 2: leal -RW_WRITE_LOCKED(%eax), %ecx
187: subl CPUVAR(CURLWP), %ecx
188: jnz,pn 3f
189: LOCK
190: cmpxchgl %ecx, RW_OWNER(%edx)
191: jnz,pn 3f
192: ret
193:
194: /*
195: * Slow path.
196: */
197: 3: jmp _C_LABEL(rw_vector_exit)
198:
199: #ifndef __XEN__
200:
201: /*
202: * void mutex_spin_enter(kmutex_t *mtx);
203: *
204: * Acquire a spin mutex and post a load fence.
205: */
206: ALIGN64
207:
208: STUB(mutex_spin_enter, mutex_vector_enter) /* 0x00c0, 51 bytes */
209: movl CPUVAR(SELF150), %eax
210: movl (CPU_INFO_ILEVEL-0x150)(%eax), %ecx
211: subl $1, (CPU_INFO_MTX_COUNT-0x150)(%eax)/* decl does not set CF */
212: jnc 1f
213: movl %ecx, (CPU_INFO_MTX_OLDSPL-0x150)(%eax)
214: 1: movb MTX_IPL(%edx), %ch
215: cmpb %ch, %cl
216: jg,pn 2f
217: movb %ch, (CPU_INFO_ILEVEL-0x150)(%eax)/* splraiseipl() */
218: 2:
219: #if defined(FULL)
220: mov $0x0100, %eax /* new + expected value */
221: LOCK
222: cmpxchgb %ah, MTX_LOCK(%edx) /* lock it */
223: jnz,pn _C_LABEL(mutex_spin_retry)
224: #endif
225: ret
226:
227: ALIGN64
228: LABEL(mutex_spin_enter_end)
229:
230: /*
231: * void mutex_spin_exit(kmutex_t *mtx);
232: *
233: * Release a spin mutex and post a store fence.
234: */
235: ALIGN64
236:
237: STUB(mutex_spin_exit, mutex_vector_exit) /* 0x0100, 50 bytes */
238: #if defined(DIAGNOSTIC)
239: movl $0x0001, %eax /* new + expected value */
240: cmpxchgb %ah, MTX_LOCK(%edx)
241: jnz,pn _C_LABEL(mutex_vector_exit)
242: #elif defined(MULTIPROCESSOR)
243: movb $0x00,MTX_LOCK(%edx)
244: #endif
245: movl CPUVAR(SELF150), %eax
246: movl (CPU_INFO_MTX_OLDSPL-0x150)(%eax), %ecx
247: incl (CPU_INFO_MTX_COUNT-0x150)(%eax)
248: jnz 1f
249: cmpl (CPU_INFO_ILEVEL-0x150)(%eax), %ecx
250: movl %ecx, 4(%esp)
251: jae 1f
252: movl (CPU_INFO_IUNMASK-0x150)(%eax,%ecx,4), %edx
253: cli
254: testl (CPU_INFO_IPENDING-0x150)(%eax), %edx
255: jnz _C_LABEL(Xspllower) /* does sti */
256: movl %ecx, (CPU_INFO_ILEVEL-0x150)(%eax)
257: sti
258: 1: ret
259:
260: ALIGN64
261: LABEL(mutex_spin_exit_end)
262:
263: #if !defined(I386_CPU) && defined(I686_CPU) && !defined(DIAGNOSTIC)
264:
265: /*
266: * Patch for i686 CPUs where cli/sti is prohibitavely expensive.
267: * Must be the same size as mutex_spin_exit().
268: */
269: ALIGN64
270:
271: ENTRY(i686_mutex_spin_exit) /* 64 bytes */
272: mov 4(%esp),%edx
273: xorl %eax,%eax
274: pushl %edi
275: fs
276: movl (CPU_INFO_SELF150)(%eax), %edi /* now splx() */
277: pushl %ebx
278: movl (CPU_INFO_MTX_OLDSPL-0x150)(%edi), %ecx
279: incl (CPU_INFO_MTX_COUNT-0x150)(%edi)
280: movb %al, MTX_LOCK(%edx) /* zero */
281: movl (CPU_INFO_ILEVEL-0x150)(%edi), %edx
282: jnz 1f
283: cmpl %edx, %ecx /* new level is lower? */
284: jae,pn 1f
1.2.8.1 ! ad 285: 0:
! 286: movl (CPU_INFO_IPENDING-0x150)(%edi), %eax
1.2 ad 287: testl %eax,(CPU_INFO_IUNMASK-0x150)(%edi,%ecx,4)
288: movl %eax, %ebx
289: /*
290: * On a P4 this jump is cheaper than patching in junk using
291: * cmovnz. Is cmpxchg expensive if it fails?
292: */
293: jnz,pn 2f
294: cmpxchg8b (CPU_INFO_ISTATE-0x150)(%edi) /* swap in new ilevel */
1.2.8.1 ! ad 295: jnz,pn 0b
1.2 ad 296: 1:
297: popl %ebx
298: popl %edi
299: ret
300: 2:
301: popl %ebx
302: popl %edi
303: movl %ecx,4(%esp)
304: LABEL(i686_mutex_spin_exit_patch)
305: jmp _C_LABEL(Xspllower)
306: ALIGN64
307: LABEL(i686_mutex_spin_exit_end)
308:
309: #endif /* !defined(I386_CPU) && defined(I686_CPU) && !defined(DIAGNOSTIC) */
310:
311: #else /* !__XEN__ */
312:
313: /* For now; strong alias not working for some reason. */
314: NENTRY(mutex_spin_enter)
315: jmp _C_LABEL(mutex_vector_enter)
316:
317: NENTRY(mutex_spin_exit)
318: jmp _C_LABEL(mutex_vector_exit)
319:
320: #endif /* !__XEN__ */
321:
322: #endif /* !LOCKDEBUG */
323:
324: /*
325: * int _lock_cas(uintptr_t *val, uintptr_t old, uintptr_t new);
326: *
327: * Perform an atomic compare-and-set operation.
328: */
329: ALIGN64
330:
331: STUB(_lock_cas, _80386_lock_cas) /* 32 bytes */
332: movl 8(%esp), %eax
333: movl 12(%esp), %ecx
334: LOCK
335: cmpxchgl %ecx, (%edx)
336: movl $0, %eax
337: setz %al
338: ret
339:
340: #ifdef I386_CPU
341: /*
342: * Since we can't do compare-and-exchange atomically with an 80386, we must
343: * disable interrupts in order to support preemption. On the i386 this is
344: * cheap to do. For other architectures a restartable sequence is usually
345: * a better option.
346: */
347: _80386_lock_cas:
348: movl 8(%esp), %eax
349: movl 12(%esp), %ecx
350: cli
351: cmpl %eax, (%edx)
352: jne 1f
353: movl %ecx, (%edx)
354: movb $1, %al
355: sti
356: ret
357:
358: 1: sti
359: xorl %eax, %eax
360: ret
361: #endif /* I386_CPU */
362:
363: /*
364: * Memory barrier operations, may be patched at runtime.
365: */
366: .align 8
367:
368: NENTRY(mb_read)
369: lock
370: addl $0, 0(%esp)
371: ret
372: END(mb_read_end, 8)
373:
374: NENTRY(mb_write)
375: /* Nothing at the moment. */
376: ret
377: END(mb_write_end, 8)
378:
379: NENTRY(mb_memory)
380: lock
381: addl $0, 0(%esp)
382: ret
383: END(mb_memory_end, 8)
384:
385: #ifdef I686_CPU
386: NENTRY(sse2_mb_read)
387: lfence
388: ret
389: END(sse2_mb_read_end, 8)
390:
391: NENTRY(sse2_mb_memory)
392: mfence
393: ret
394: END(sse2_mb_memory_end, 8)
395: #endif /* I686_CPU */
396:
397: /*
398: * Make sure code after the ret is properly encoded with nopness
399: * by gas, or could stall newer processors.
400: */
401:
402: NENTRY(x86_mb_nop)
403: ret
404: END(x86_mb_nop_end, 8)
CVSweb <webmaster@jp.NetBSD.org>