[x264-devel] [Git][videolan/x264][master] Update gas-preprocessor.pl to the latest upstream version
Anton Mitrofanov (@BugMaster)
gitlab at videolan.org
Wed May 21 19:02:13 UTC 2025
Anton Mitrofanov pushed to branch master at VideoLAN / x264
Commits:
85b5ccea by Martin Storsjö at 2025-05-21T18:48:51+00:00
Update gas-preprocessor.pl to the latest upstream version
This updates to the version from commit
7380ac24e1cd23a5e6d76c6af083d8fc5ab9e943 from
https://github.com/ffmpeg/gas-preprocessor.
The previous version was from 2017, from commit
ee12830747ff0b97ec6b41f4263fec63d1711365.
This includes support for assembling aarch64 code with
register ranges, such as {v0.8b-v3.8b} with armasm64 (rewritten
into an explicit list of registers), and fixes deprecated Perl
syntax broken by more modern versions of Perl.
- - - - -
1 changed file:
- tools/gas-preprocessor.pl
Changes:
=====================================
tools/gas-preprocessor.pl
=====================================
@@ -16,6 +16,7 @@ my %canonical_arch = ("aarch64" => "aarch64", "arm64" => "aarch64",
my %comments = ("aarch64" => '//',
"arm" => '@',
+ "ppc" => '#',
"powerpc" => '#');
my @gcc_cmd;
@@ -27,6 +28,7 @@ my $as_type = "apple-gas";
my $fix_unreq = $^O eq "darwin";
my $force_thumb = 0;
+my $verbose = 0;
my $arm_cond_codes = "eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le|al|hs|lo";
@@ -34,7 +36,7 @@ my $usage_str = "
$0\n
Gas-preprocessor.pl converts assembler files using modern GNU as syntax for
Apple's ancient gas version or clang's incompatible integrated assembler. The
-conversion is regularly tested for Libav, x264 and vlc. Other projects might
+conversion is regularly tested for FFmpeg, Libav, x264 and vlc. Other projects might
use different features which are not correctly handled.
Options for this program needs to be separated with ' -- ' from the assembler
@@ -48,6 +50,7 @@ command. Following options are currently supported:
-force-thumb - assemble as thumb regardless of the input source
(note, this is incomplete and only works for sources
it explicitly was tested with)
+ -verbose - print executed commands
";
sub usage() {
@@ -61,12 +64,14 @@ while (@ARGV) {
$fix_unreq = $1 ne "no-";
} elsif ($opt eq "-force-thumb") {
$force_thumb = 1;
+ } elsif ($opt eq "-verbose") {
+ $verbose = 1;
} elsif ($opt eq "-arch") {
$arch = shift;
die "unknown arch: '$arch'\n" if not exists $canonical_arch{$arch};
} elsif ($opt eq "-as-type") {
$as_type = shift;
- die "unknown as type: '$as_type'\n" if $as_type !~ /^((apple-)?(gas|clang)|armasm)$/;
+ die "unknown as type: '$as_type'\n" if $as_type !~ /^((apple-)?(gas|clang|llvm_gcc)|armasm)$/;
} elsif ($opt eq "-help") {
usage();
exit 0;
@@ -90,6 +95,7 @@ if (grep /\.c$/, @gcc_cmd) {
# pass -v/--version along, used during probing. Matching '-v' might have
# uninteded results but it doesn't matter much if gas-preprocessor or
# the compiler fails.
+ print STDERR join(" ", @gcc_cmd)."\n" if $verbose;
exec(@gcc_cmd);
} else {
die "Unrecognized input filetype";
@@ -97,14 +103,7 @@ if (grep /\.c$/, @gcc_cmd) {
if ($as_type eq "armasm") {
$preprocess_c_cmd[0] = "cpp";
- push(@preprocess_c_cmd, "-undef");
- # Normally a preprocessor for windows would predefine _WIN32,
- # but we're using any generic system-agnostic preprocessor "cpp"
- # with -undef (to avoid getting predefined variables from the host
- # system in cross compilation cases), so manually define it here.
- push(@preprocess_c_cmd, "-D_WIN32");
-
- @preprocess_c_cmd = grep ! /^-nologo$/, @preprocess_c_cmd;
+
# Remove -ignore XX parameter pairs from preprocess_c_cmd
my $index = 1;
while ($index < $#preprocess_c_cmd) {
@@ -115,9 +114,23 @@ if ($as_type eq "armasm") {
$index++;
}
if (grep /^-MM$/, @preprocess_c_cmd) {
+ push(@preprocess_c_cmd, "-D_WIN32");
+ # Normally a preprocessor for windows would predefine _WIN32,
+ # but we're using any generic system-agnostic preprocessor "cpp"
+ # with -undef (to avoid getting predefined variables from the host
+ # system in cross compilation cases), so manually define it here.
+ # We only use this generic preprocessor for generating dependencies,
+ # if the build system runs preprocessing with -M/-MM without -MF.
+ push(@preprocess_c_cmd, "-undef");
+ @preprocess_c_cmd = grep ! /^-nologo$/, @preprocess_c_cmd;
+ print STDERR join(" ", @preprocess_c_cmd)."\n" if $verbose;
system(@preprocess_c_cmd) == 0 or die "Error running preprocessor";
exit 0;
}
+
+ # If not preprocessing for getting a dependency list, use cl.exe
+ # instead.
+ $preprocess_c_cmd[0] = "cl.exe";
}
# if compiling, avoid creating an output file named '-.o'
@@ -132,22 +145,26 @@ if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
}
}
}
-# replace only the '-o' argument with '-', avoids rewriting the make dependency
-# target specified with -MT to '-'
+# Remove the -o argument; if omitted, we by default preprocess to stdout.
my $index = 1;
while ($index < $#preprocess_c_cmd) {
if ($preprocess_c_cmd[$index] eq "-o") {
- $index++;
- $preprocess_c_cmd[$index] = "-";
+ splice(@preprocess_c_cmd, $index, 2);
+ last;
}
$index++;
}
+ at preprocess_c_cmd = grep ! /^-c$/, @preprocess_c_cmd;
+
my $tempfile;
if ($as_type ne "armasm") {
@gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
+
+ # Filter out options that can cause warnings due to unused arguments,
+ # Clang warns about unused -D parameters when invoked with "-x assembler".
+ @gcc_cmd = grep ! /^-D/, @gcc_cmd;
} else {
- @preprocess_c_cmd = grep ! /^-c$/, @preprocess_c_cmd;
@preprocess_c_cmd = grep ! /^-m/, @preprocess_c_cmd;
@preprocess_c_cmd = grep ! /^-G/, @preprocess_c_cmd;
@@ -156,6 +173,10 @@ if ($as_type ne "armasm") {
@preprocess_c_cmd = grep ! /^-fp/, @preprocess_c_cmd;
@preprocess_c_cmd = grep ! /^-EHsc$/, @preprocess_c_cmd;
@preprocess_c_cmd = grep ! /^-O/, @preprocess_c_cmd;
+ @preprocess_c_cmd = grep ! /^-oldit/, @preprocess_c_cmd;
+ @preprocess_c_cmd = grep ! /^-FS/, @preprocess_c_cmd;
+ @preprocess_c_cmd = grep ! /^-w/, @preprocess_c_cmd;
+ @preprocess_c_cmd = grep ! /^-M/, @preprocess_c_cmd;
@gcc_cmd = grep ! /^-G/, @gcc_cmd;
@gcc_cmd = grep ! /^-W/, @gcc_cmd;
@@ -163,6 +184,8 @@ if ($as_type ne "armasm") {
@gcc_cmd = grep ! /^-fp/, @gcc_cmd;
@gcc_cmd = grep ! /^-EHsc$/, @gcc_cmd;
@gcc_cmd = grep ! /^-O/, @gcc_cmd;
+ @gcc_cmd = grep ! /^-FS/, @gcc_cmd;
+ @gcc_cmd = grep ! /^-w/, @gcc_cmd;
my @outfiles = grep /\.(o|obj)$/, @gcc_cmd;
$tempfile = $outfiles[0].".asm";
@@ -195,6 +218,8 @@ if (!$arch) {
# assume we're not cross-compiling if no -arch or the binary doesn't have the arch name
$arch = qx/arch/ if (!$arch);
+# remove any whitespace, e.g. arch command might print a newline
+$arch =~ s/\s+//g;
die "Unknown target architecture '$arch'" if not exists $canonical_arch{$arch};
@@ -206,12 +231,14 @@ $comm = ";" if $as_type =~ /armasm/;
my %ppc_spr = (ctr => 9,
vrsave => 256);
+print STDERR join(" ", @preprocess_c_cmd)."\n" if $verbose;
open(INPUT, "-|", @preprocess_c_cmd) || die "Error running preprocessor";
if ($ENV{GASPP_DEBUG}) {
open(ASMFILE, ">&STDOUT");
} else {
if ($as_type ne "armasm") {
+ print STDERR join(" ", @gcc_cmd)."\n" if $verbose;
open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
} else {
open(ASMFILE, ">", $tempfile);
@@ -265,6 +292,9 @@ my %aarch64_req_alias;
if ($force_thumb) {
parse_line(".thumb\n");
}
+if ($as_type eq "armasm") {
+ parse_line(".text\n");
+}
# pass 1: parse .macro
# note that the handling of arguments is probably overly permissive vs. gas
@@ -281,12 +311,11 @@ while (<INPUT>) {
s/\r$//;
foreach my $subline (split(";", $_)) {
- # Add newlines at the end of lines that don't already have one
chomp $subline;
- $subline .= "\n";
- parse_line($subline);
+ parse_line_continued($subline);
}
}
+parse_line_continued("");
sub eval_expr {
my $expr = $_[0];
@@ -310,7 +339,7 @@ sub handle_if {
$expr =~ s/\s//g;
$result ^= $expr eq "";
} elsif ($type eq "c") {
- if ($expr =~ /(.*)\s*,\s*(.*)/) {
+ if ($expr =~ /(\S*)\s*,\s*(\S*)/) {
$result ^= $1 eq $2;
} else {
die "argument to .ifc not recognized";
@@ -323,7 +352,7 @@ sub handle_if {
$result = eval_expr($expr) < 0;
} else {
chomp($line);
- die "unhandled .if varient. \"$line\"";
+ die "unhandled .if variant. \"$line\"";
}
push (@ifstack, $result);
return 1;
@@ -369,18 +398,32 @@ sub parse_if_line {
return 0;
}
+my $last_line = "";
+sub parse_line_continued {
+ my $line = $_[0];
+ $last_line .= $line;
+ if ($last_line =~ /\\$/) {
+ $last_line =~ s/\\$//;
+ } else {
+ # Add newlines at the end of lines after concatenation.
+ $last_line .= "\n";
+ parse_line($last_line);
+ $last_line = "";
+ }
+}
+
sub parse_line {
my $line = $_[0];
return if (parse_if_line($line));
if (scalar(@rept_lines) == 0) {
- if (/\.macro/) {
+ if ($line =~ /\.macro/) {
$macro_level++;
if ($macro_level > 1 && !$current_macro) {
die "nested macros but we don't have master macro";
}
- } elsif (/\.endm/) {
+ } elsif ($line =~ /\.endm/) {
$macro_level--;
if ($macro_level < 0) {
die "unmatched .endm";
@@ -701,7 +744,10 @@ sub handle_serialized_line {
}
# mach-o local symbol names start with L (no dot)
- $line =~ s/(?<!\w)\.(L\w+)/$1/g;
+ # armasm also can't handle labels that start with a dot.
+ if ($as_type =~ /apple-/ or $as_type eq "armasm") {
+ $line =~ s/(?<!\w)\.(L\w+)/$1/g;
+ }
# recycle the '.func' directive for '.thumb_func'
if ($thumb and $as_type =~ /^apple-/) {
@@ -767,7 +813,7 @@ sub handle_serialized_line {
# This line seems to possibly have a neon instruction
foreach (keys %neon_alias_reg) {
my $alias = $_;
- # Require the register alias to match as an invididual word, not as a substring
+ # Require the register alias to match as an individual word, not as a substring
# of a larger word-token.
if ($line =~ /\b$alias\b/) {
$line =~ s/\b$alias\b/$neon_alias_reg{$alias}/g;
@@ -860,7 +906,7 @@ sub handle_serialized_line {
my $name = "temp_label_$temp_label_next";
$temp_label_next++;
# The matching regexp above removes the label from the start of
- # the line (which might contain an instruction as well), readd
+ # the line (which might contain an instruction as well), re-add
# it on a separate line above it.
$line = "$name:\n" . $line;
$last_temp_labels{$num} = $name;
@@ -879,7 +925,7 @@ sub handle_serialized_line {
# Check branch instructions
- if ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(bl?x?\.?(..)?(\.w)?)\s+(\w+)/) {
+ if ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(bl?x?\.?([^\s]{2})?(\.w)?)\s+(\w+)/) {
my $instr = $2;
my $cond = $3;
my $width = $4;
@@ -895,7 +941,7 @@ sub handle_serialized_line {
($arch eq "aarch64" and !is_aarch64_register($target))) {
$call_targets{$target}++;
}
- } elsif ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(cbn?z|adr|tbz)\s+(\w+)\s*,(\s*#\d+\s*,)?\s*(\w+)/) {
+ } elsif ($line =~ /(?:^|\n)\s*(\w+\s*:\s*)?(cbn?z|adr|tbn?z)\s+(\w+)\s*,(\s*#\d+\s*,)?\s*(\w+)/) {
my $instr = $2;
my $reg = $3;
my $bit = $4;
@@ -908,12 +954,12 @@ sub handle_serialized_line {
}
# Convert tbz with a wX register into an xX register,
# due to armasm64 bugs/limitations.
- if ($instr eq "tbz" and $reg =~ /w\d+/) {
+ if (($instr eq "tbz" or $instr eq "tbnz") and $reg =~ /w\d+/) {
my $xreg = $reg;
$xreg =~ s/w/x/;
$line =~ s/\b$reg\b/$xreg/;
}
- } elsif ($line =~ /^\s*.h?word.*\b\d+[bf]\b/) {
+ } elsif ($line =~ /^\s*.([hxd]?word|quad).*\b\d+[bf]\b/) {
while ($line =~ /\b(\d+)([bf])\b/g) {
$line = handle_local_label($line, $1, $2);
}
@@ -922,7 +968,7 @@ sub handle_serialized_line {
# ALIGN in armasm syntax is the actual number of bytes
if ($line =~ /\.(?:p2)?align\s+(\d+)/) {
my $align = 1 << $1;
- $line =~ s/\.(?:p2)?align\s(\d+)/ALIGN $align/;
+ $line =~ s/\.(?:p2)?align\s+(\d+)/ALIGN $align/;
}
# Convert gas style [r0, :128] into armasm [r0 at 128] alignment specification
$line =~ s/\[([^\[,]+),?\s*:(\d+)\]/[$1\@$2]/g;
@@ -973,8 +1019,8 @@ sub handle_serialized_line {
my $reg = $1;
my $sym = $2;
my $offset = eval_expr($3);
- if ($offset < 0) {
- # armasm64 is buggy with ldr x0, =sym+offset where the
+ if ($offset < 0 and $ENV{GASPP_ARMASM64_SKIP_NEG_OFFSET}) {
+ # armasm64 in VS < 15.6 is buggy with ldr x0, =sym+offset where the
# offset is a negative value; it does write a negative
# offset into the literal pool as it should, but the
# negative offset only covers the lower 32 bit of the 64
@@ -1005,7 +1051,7 @@ sub handle_serialized_line {
# Convert e.g. "add x0, x0, w0, uxtw" into "add x0, x0, w0, uxtw #0",
# or "ldr x0, [x0, w0, uxtw]" into "ldr x0, [x0, w0, uxtw #0]".
- $line =~ s/(uxtw|sxtw)(\s*\]?\s*)$/\1 #0\2/i;
+ $line =~ s/(uxt[whb]|sxt[whb])(\s*\]?\s*)$/\1 #0\2/i;
# Convert "mov x0, v0.d[0]" into "umov x0, v0.d[0]"
$line =~ s/\bmov\s+[xw]\d+\s*,\s*v\d+\.[ds]/u$&/i;
@@ -1020,10 +1066,13 @@ sub handle_serialized_line {
# Convert "cset w0, lo" into "csetlo w0"
$line =~ s/(cset)\s+([xw]\w+)\s*,\s*($arm_cond_codes)/\1\3 \2/;
- # Strip out prfum; armasm64 fails to assemble any
- # variant/combination of prfum tested so far, but it can be
- # left out without any
- $line =~ s/prfum.*\]//;
+ if ($ENV{GASPP_ARMASM64_SKIP_PRFUM}) {
+ # Strip out prfum; armasm64 (VS < 15.5) fails to assemble any
+ # variant/combination of prfum tested so far, but since it is
+ # a prefetch instruction it can be skipped without changing
+ # results.
+ $line =~ s/prfum.*\]//;
+ }
# Convert "ldrb w0, [x0, #-1]" into "ldurb w0, [x0, #-1]".
# Don't do this for forms with writeback though.
@@ -1041,7 +1090,7 @@ sub handle_serialized_line {
if ($ENV{GASPP_ARMASM64_INVERT_SCALE}) {
# Instructions like fcvtzs and scvtf store the scale value
# inverted in the opcode (stored as 64 - scale), but armasm64
- # in early versions stores it as-is. Thus convert from
+ # in VS < 15.5 stores it as-is. Thus convert from
# "fcvtzs w0, s0, #8" into "fcvtzs w0, s0, #56".
if ($line =~ /(?:fcvtzs|scvtf)\s+(\w+)\s*,\s*(\w+)\s*,\s*#(\d+)/) {
my $scale = $3;
@@ -1049,6 +1098,24 @@ sub handle_serialized_line {
$line =~ s/#$scale/#$inverted_scale/;
}
}
+
+ # Convert "ld1 {v0.4h-v3.4h}" into "ld1 {v0.4h,v1.4h,v2.4h,v3.4h}"
+ if ($line =~ /(\{\s*v(\d+)\.(\d+[bhsdBHSD])\s*-\s*v(\d+)\.(\d+[bhsdBHSD])\s*\})/) {
+ my $regspec = $1;
+ my $reg1 = $2;
+ my $layout1 = $3;
+ my $reg2 = $4;
+ my $layout2 = $5;
+ if ($layout1 eq $layout2) {
+ my $new_regspec = "{";
+ foreach my $i ($reg1 .. $reg2) {
+ $new_regspec .= "," if ($i > $reg1);
+ $new_regspec .= "v$i.$layout1";
+ }
+ $new_regspec .= "}";
+ $line =~ s/$regspec/$new_regspec/;
+ }
+ }
}
# armasm is unable to parse &0x - add spacing
$line =~ s/&0x/& 0x/g;
@@ -1103,6 +1170,8 @@ sub handle_serialized_line {
$line =~ s/\.syntax/$comm$&/x if $as_type =~ /armasm/;
$line =~ s/\.hword/.short/x;
+ $line =~ s/\.xword/.quad/x;
+ $line =~ s/\.dword/.quad/x;
if ($as_type =~ /^apple-/) {
# the syntax for these is a little different
@@ -1117,6 +1186,7 @@ sub handle_serialized_line {
}
if ($as_type eq "armasm") {
$line =~ s/\.global/EXPORT/x;
+ $line =~ s/\.extern/IMPORT/x;
$line =~ s/\.int/dcd/x;
$line =~ s/\.long/dcd/x;
$line =~ s/\.float/dcfs/x;
@@ -1130,13 +1200,17 @@ sub handle_serialized_line {
$line =~ s/\.arm/ARM/x;
# The alignment in AREA is the power of two, just as .align in gas
$line =~ s/\.text/AREA |.text|, CODE, READONLY, ALIGN=4, CODEALIGN/;
- $line =~ s/(\s*)(.*)\.rodata/$1AREA |.rodata|, DATA, READONLY, ALIGN=5/;
+ $line =~ s/(\s*)(.*)\.ro?data(\s*,\s*"\w+")?/$1AREA |.rdata|, DATA, READONLY, ALIGN=5/;
$line =~ s/\.data/AREA |.data|, DATA, ALIGN=5/;
}
if ($as_type eq "armasm" and $arch eq "arm") {
$line =~ s/fmxr/vmsr/;
$line =~ s/fmrx/vmrs/;
$line =~ s/fadds/vadd.f32/;
+ # Armasm in VS 2019 16.3 errors out on "it" instructions. But
+ # armasm implicitly adds the necessary it instructions anyway, so we
+ # can just filter them out.
+ $line =~ s/^\s*it[te]*\s+/$comm$&/;
}
if ($as_type eq "armasm" and $arch eq "aarch64") {
# Convert "b.eq" into "beq"
@@ -1170,6 +1244,7 @@ if ($as_type ne "armasm") {
close(INPUT) or exit 1;
close(ASMFILE) or exit 1;
if ($as_type eq "armasm" and ! defined $ENV{GASPP_DEBUG}) {
+ print STDERR join(" ", @gcc_cmd)."\n" if $verbose;
system(@gcc_cmd) == 0 or die "Error running assembler";
}
View it on GitLab: https://code.videolan.org/videolan/x264/-/commit/85b5ccea1fab98841d79455e344c797c5ffc3212
--
View it on GitLab: https://code.videolan.org/videolan/x264/-/commit/85b5ccea1fab98841d79455e344c797c5ffc3212
You're receiving this email because of your account on code.videolan.org.
VideoLAN code repository instance
More information about the x264-devel
mailing list