636 __ move(T8, T1); // original count in T1 |
636 __ move(T8, T1); // original count in T1 |
637 __ daddi(AT, T1, -3); |
637 __ daddi(AT, T1, -3); |
638 __ blez(AT, l_4); |
638 __ blez(AT, l_4); |
639 __ delayed()->nop(); |
639 __ delayed()->nop(); |
640 if (!aligned) { |
640 if (!aligned) { |
|
641 // 2016/5/8 Jin: only when src and dest has the same alignment can we do lw/sw */ |
|
642 __ andi(AT, T3, 3); |
|
643 __ andi(T9, T0, 3); |
|
644 __ bne(AT, T9, l_5); |
|
645 __ delayed()->nop(); |
|
646 |
641 // align source address at dword address boundary |
647 // align source address at dword address boundary |
642 __ move(T1, 4); |
648 __ move(T1, 4); |
643 __ sub(T1, T1, T3); |
649 __ sub(T1, T1, T3); |
644 __ andi(T1, T1, 3); |
650 __ andi(T1, T1, 3); |
645 __ beq(T1, R0, l_1); |
651 __ beq(T1, R0, l_1); |
709 // dwords or qwords that span cache line boundaries will still be loaded |
715 // dwords or qwords that span cache line boundaries will still be loaded |
710 // and stored atomically. |
716 // and stored atomically. |
711 // |
717 // |
712 address generate_conjoint_byte_copy(bool aligned, const char *name) { |
718 address generate_conjoint_byte_copy(bool aligned, const char *name) { |
713 Label l_1, l_2, l_3, l_4, l_5; |
719 Label l_1, l_2, l_3, l_4, l_5; |
|
720 Label l_unaligned, l_aligned; |
714 StubCodeMark mark(this, "StubRoutines", name); |
721 StubCodeMark mark(this, "StubRoutines", name); |
715 __ align(CodeEntryAlignment); |
722 __ align(CodeEntryAlignment); |
716 address start = __ pc(); |
723 address start = __ pc(); |
717 address nooverlap_target = aligned ? |
724 address nooverlap_target = aligned ? |
718 StubRoutines::arrayof_jbyte_disjoint_arraycopy() : |
725 StubRoutines::arrayof_jbyte_disjoint_arraycopy() : |
723 __ push(T3); |
730 __ push(T3); |
724 __ push(T0); |
731 __ push(T0); |
725 __ push(T1); |
732 __ push(T1); |
726 __ push(T8); |
733 __ push(T8); |
727 |
734 |
728 |
|
729 // copy from high to low |
735 // copy from high to low |
730 __ move(T3, A0); |
736 __ move(T3, A0); |
731 __ move(T0, A1); |
737 __ move(T0, A1); |
732 __ move(T1, A2); |
738 __ move(T1, A2); |
733 __ dadd(AT, T3, T1); |
739 __ dadd(T3, T3, T1); |
734 __ lea(T3, Address(AT, -4)); |
740 __ dadd(T0, T0, T1); |
735 __ dadd(AT, T0, T1); |
741 |
736 __ lea(T0, Address(AT, -4)); |
742 // 2016/5/8 Jin: copy starting unalinged bytes |
|
743 __ bind(l_unaligned); |
|
744 __ beq(T1, R0, l_5); |
|
745 __ delayed()->nop(); |
|
746 |
|
747 __ andi(AT, T3, 3); |
|
748 __ beq(AT, R0, l_aligned); |
|
749 __ delayed()->nop(); |
|
750 __ lb(AT, T3, -1); |
|
751 __ sb(AT, T0, -1); |
|
752 __ daddi(AT, T1, -1); |
|
753 __ daddi(AT, T3, -1); |
|
754 __ daddi(AT, T0, -1); |
|
755 __ b(l_unaligned); |
|
756 __ delayed()->nop(); |
|
757 |
|
758 // now T0, T3 point to 4-byte aligned high-ends |
|
759 // T1 contains byte count that is not copied. |
|
760 __ bind(l_aligned); |
|
761 |
737 __ move(T8, T1); |
762 __ move(T8, T1); |
738 __ daddi(AT, T1, -3); |
763 __ daddi(AT, T1, -3); |
739 __ blez(AT, l_3); |
764 __ blez(AT, l_3); |
740 __ delayed()->nop(); |
765 __ delayed()->nop(); |
|
766 |
|
767 __ andi(T8, T8, 3); |
|
768 __ lea(T3, Address(T3, -4)); |
|
769 __ lea(T0, Address(T0, -4)); |
|
770 |
741 __ dsrl(T1, T1, 2); |
771 __ dsrl(T1, T1, 2); |
742 __ align(16); |
772 __ align(16); |
743 __ bind(l_1); |
773 __ bind(l_1); |
744 __ lw(AT, T3, 0); |
774 __ lw(AT, T3, 0); |
745 __ sw(AT, T0, 0); |
775 __ sw(AT, T0, 0); |