Skip to content

Commit

Permalink
Only keep semantic fields in Java, i.e. skip location fields
Browse files Browse the repository at this point in the history
* Add $YARP_SERIALIZE_ONLY_SEMANTICS_FIELDS to control where to serialize location fields at templating time,
  this way there is no overhead for either case and nothing to check at runtime.
* Add a byte in the header to indicate whether location fields are included as expected.
* Fixes #807
* Simplify the build-java CI job now that the FFI backend is available so JRuby can serialize.
* Support keeping some location fields which are still needed until there is a replacement
  • Loading branch information
eregon committed Sep 16, 2023
1 parent 0d8d1be commit fc5cf2d
Show file tree
Hide file tree
Showing 13 changed files with 88 additions and 66 deletions.
11 changes: 1 addition & 10 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -116,22 +116,13 @@ jobs:
JRUBY_OPTS: "--dev"
steps:
- uses: actions/checkout@v3
- name: Set up CRuby
uses: ruby/setup-ruby@v1
with:
ruby-version: 3.2
bundler-cache: true
- name: Serialize fixtures on CRuby
run: bundle exec rake compile test:serialize_fixtures
- name: Set up JRuby
uses: ruby/setup-ruby@v1
with:
ruby-version: jruby
bundler-cache: true
- name: Compile generated Java files
run: bundle exec rake compile
- name: Run Java Loader test
run: JRUBY_OPTS="-J-ea" bundle exec rake test:java_loader
run: YARP_SERIALIZE_ONLY_SEMANTICS_FIELDS=1 JRUBY_OPTS="-J-ea" bundle exec rake test:java_loader

lex-ruby:
runs-on: ubuntu-latest
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
/doc/
/pkg/
/spec/reports/
/test/yarp/serialized/
/top-100-gems/
/tmp/
/vendor/bundle
Expand Down
4 changes: 4 additions & 0 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1771,6 +1771,7 @@ nodes:
type: location
- name: content_loc
type: location
semantic_field: true # https://github.com/ruby/yarp/issues/1452
- name: closing_loc
type: location
- name: unescaped
Expand Down Expand Up @@ -2091,6 +2092,7 @@ nodes:
type: location
- name: content_loc
type: location
semantic_field: true # https://github.com/ruby/yarp/issues/1452
- name: closing_loc
type: location
- name: unescaped
Expand Down Expand Up @@ -2284,8 +2286,10 @@ nodes:
kind: StringFlags
- name: opening_loc
type: location?
semantic_field: true # https://github.com/ruby/yarp/issues/1452
- name: content_loc
type: location
semantic_field: true # https://github.com/ruby/yarp/issues/1452
- name: closing_loc
type: location?
- name: unescaped
Expand Down
1 change: 1 addition & 0 deletions docs/serialization.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ The header is structured like the following table:
| `1` | major version number |
| `1` | minor version number |
| `1` | patch version number |
| `1` | 1 indicates only semantics fields were serialized, 0 indicates all fields were serialized (including location fields) |
| string | the encoding name |
| varint | number of comments |
| comment* | comments |
Expand Down
21 changes: 10 additions & 11 deletions lib/yarp/ffi.rb
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,14 @@ def self.with(filepath, &block)
end
end
end

def self.dump_internal(source, source_size, filepath)
YPBuffer.with do |buffer|
metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
yp_parse_serialize(source, source_size, buffer.pointer, metadata)
buffer.read
end
end
end

# Mark the LibRubyParser module as private as it should only be called through
Expand All @@ -175,24 +183,15 @@ def self.with(filepath, &block)
# The version constant is set by reading the result of calling yp_version.
VERSION = LibRubyParser.yp_version.read_string

def self.dump_internal(source, source_size, filepath)
LibRubyParser::YPBuffer.with do |buffer|
metadata = [filepath.bytesize, filepath.b, 0].pack("LA*L") if filepath
LibRubyParser.yp_parse_serialize(source, source_size, buffer.pointer, metadata)
buffer.read
end
end
private_class_method :dump_internal

# Mirror the YARP.dump API by using the serialization API.
def self.dump(code, filepath = nil)
dump_internal(code, code.bytesize, filepath)
LibRubyParser.dump_internal(code, code.bytesize, filepath)
end

# Mirror the YARP.dump_file API by using the serialization API.
def self.dump_file(filepath)
LibRubyParser::YPString.with(filepath) do |string|
dump_internal(string.source, string.length, filepath)
LibRubyParser.dump_internal(string.source, string.length, filepath)
end
end

Expand Down
35 changes: 15 additions & 20 deletions rakelib/serialization.rake
Original file line number Diff line number Diff line change
@@ -1,39 +1,34 @@
# frozen_string_literal: true

fixtures = File.expand_path("../test/yarp/fixtures", __dir__)
serialized_dir = File.expand_path("../serialized", fixtures)
task "test:java_loader" do
# Recompile with YARP_SERIALIZE_ONLY_SEMANTICS_FIELDS=1
# Due to some JRuby bug this does not get propagated to the compile task, so require the caller to set the env var
# ENV["YARP_SERIALIZE_ONLY_SEMANTICS_FIELDS"] = "1"
raise "this task requires $SERIALIZE_ONLY_SEMANTICS_FIELDS to be set" unless ENV["YARP_SERIALIZE_ONLY_SEMANTICS_FIELDS"]

Rake::Task["clobber"].invoke
Rake::Task["test:java_loader:internal"].invoke
end

task "test:java_loader:internal" => :compile do
fixtures = File.expand_path("../test/yarp/fixtures", __dir__)

desc "Serialize test fixtures and save it to .serialized files"
task "test:serialize_fixtures" do
$:.unshift(File.expand_path("../lib", __dir__))
require "yarp"
raise "this task requires the FFI backend" unless YARP::BACKEND == :FFI
require "fileutils"

Dir["**/*.txt", base: fixtures].each do |relative|
path = "#{fixtures}/#{relative}"
serialized_path = "#{serialized_dir}/#{relative}"

serialized = YARP.dump_file(path)
FileUtils.mkdir_p(File.dirname(serialized_path))
File.write(serialized_path, serialized)
end
end

task "test:java_loader" do
require 'java'
require_relative '../tmp/yarp.jar'
java_import 'org.yarp.Nodes$Source'

Dir["**/*.txt", base: fixtures].each do |relative|
path = "#{fixtures}/#{relative}"
serialized_path = "#{serialized_dir}/#{relative}"
serialized = File.binread(serialized_path).unpack('c*')

puts
puts path
serialized = YARP.dump_file(path)
source_bytes = File.binread(path).unpack('c*')
source = Source.new(source_bytes.to_java(:byte))
parse_result = org.yarp.Loader.load(serialized, source)
parse_result = org.yarp.Loader.load(serialized.unpack('c*'), source)
puts parse_result.value
end
end
1 change: 1 addition & 0 deletions src/yarp.c
Original file line number Diff line number Diff line change
Expand Up @@ -14436,6 +14436,7 @@ yp_serialize(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
yp_buffer_append_u8(buffer, YP_VERSION_MAJOR);
yp_buffer_append_u8(buffer, YP_VERSION_MINOR);
yp_buffer_append_u8(buffer, YP_VERSION_PATCH);
yp_buffer_append_u8(buffer, YP_SERIALIZE_ONLY_SEMANTICS_FIELDS ? 1 : 0);

yp_serialize_content(parser, node, buffer);
yp_buffer_append_str(buffer, "\0", 1);
Expand Down
2 changes: 2 additions & 0 deletions templates/include/yarp/ast.h.erb
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,6 @@ typedef enum {
} yp_<%= flag.human %>_t;
<%- end -%>

#define YP_SERIALIZE_ONLY_SEMANTICS_FIELDS <%= YARP::SERIALIZE_ONLY_SEMANTICS_FIELDS %>

#endif // YARP_AST_H
22 changes: 12 additions & 10 deletions templates/java/org/yarp/Loader.java.erb
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,16 @@ public class Loader {
}

private ParseResult load() {
expect((byte) 'Y');
expect((byte) 'A');
expect((byte) 'R');
expect((byte) 'P');
expect((byte) 'Y', "incorrect YARP header");
expect((byte) 'A', "incorrect YARP header");
expect((byte) 'R', "incorrect YARP header");
expect((byte) 'P', "incorrect YARP header");

expect((byte) 0);
expect((byte) 12);
expect((byte) 0);
expect((byte) 0, "YARP version does not match");
expect((byte) 12, "YARP version does not match");
expect((byte) 0, "YARP version does not match");

expect((byte) 1, "Loader.java requires no location fields in the serialized output");

// This loads the name of the encoding. We don't actually do anything
// with it just yet.
Expand Down Expand Up @@ -265,7 +267,7 @@ public class Loader {
case <%= index + 1 %>:
<%-
params = node.needs_serialized_length? ? ["buffer.getInt()"] : []
params.concat node.fields.map { |field|
params.concat node.semantic_fields.map { |field|
case field
when YARP::NodeField then "#{field.java_cast}loadNode()"
when YARP::OptionalNodeField then "#{field.java_cast}loadOptionalNode()"
Expand All @@ -290,10 +292,10 @@ public class Loader {
}
}

private void expect(byte value) {
private void expect(byte value, String error) {
byte b = buffer.get();
if (b != value) {
throw new Error("Expected " + value + " but was " + b + " at position " + buffer.position());
throw new Error("Deserialization error: " + error + " (expected " + value + " but was " + b + " at position " + buffer.position() + ")");
}
}

Expand Down
26 changes: 12 additions & 14 deletions templates/java/org/yarp/Nodes.java.erb
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ public abstract class Nodes {
<%- if node.needs_serialized_length? -%>
public final int serializedLength;
<%- end -%>
<%- node.fields.each do |field| -%>
<%- node.semantic_fields.each do |field| -%>
<%- if field.class.name.include?('Optional') -%>
/** optional (can be null) */
<%- end -%>
Expand All @@ -186,29 +186,27 @@ public abstract class Nodes {

<%-
params = node.needs_serialized_length? ? ["int serializedLength"] : []
params.concat node.fields.map { |field| "#{field.java_type} #{field.name}" }
params.concat node.semantic_fields.map { |field| "#{field.java_type} #{field.name}" }
params.concat ["int startOffset", "int length"]
-%>
public <%=node.name -%>(<%= params.join(", ") %>) {
super(startOffset, length);
<%- if node.needs_serialized_length? -%>
this.serializedLength = serializedLength;
<%- end -%>
<%- node.fields.each do |field| -%>
<%- node.semantic_fields.each do |field| -%>
this.<%= field.name %> = <%= field.name %>;
<%- end -%>
}
<%# methods for flags -%>
<%- node.fields.each do |field| -%>
<%- if field.is_a?(YARP::FlagsField) -%>
<%- node.semantic_fields.grep(YARP::FlagsField).each do |field| -%>
<%- flags.find { |flag| flag.name == field.kind }.tap { |flag| raise "Expected to find #{field.kind}" unless flag }.values.each do |value| -%>

public boolean is<%= value.camelcase %>() {
return <%= field.kind %>.is<%= value.camelcase %>(this.<%= field.name %>);
}
<%- end -%>
<%- end -%>
<%- end -%>
<%# potential override of setNewLineFlag() -%>
<%- if node.newline == false -%>

Expand All @@ -220,7 +218,7 @@ public abstract class Nodes {

@Override
public void setNewLineFlag(Source source, boolean[] newlineMarked) {
<%- field = node.fields.find { |f| f.name == node.newline } or raise node.newline -%>
<%- field = node.semantic_fields.find { |f| f.name == node.newline } or raise node.newline -%>
<%- case field -%>
<%- when YARP::NodeField, YARP::OptionalNodeField -%>
this.<%= field.name %>.setNewLineFlag(source, newlineMarked);
Expand All @@ -235,7 +233,7 @@ public abstract class Nodes {
<%- end -%>

public <T> void visitChildNodes(AbstractNodeVisitor<T> visitor) {
<%- node.fields.each do |field| -%>
<%- node.semantic_fields.each do |field| -%>
<%- case field -%>
<%- when YARP::NodeListField -%>
for (Nodes.Node child : this.<%= field.name %>) {
Expand All @@ -252,15 +250,15 @@ public abstract class Nodes {
}

public Node[] childNodes() {
<%- if node.fields.none?(YARP::NodeListField) and node.fields.none?(YARP::NodeKindField) -%>
<%- if node.semantic_fields.none?(YARP::NodeListField) and node.semantic_fields.none?(YARP::NodeKindField) -%>
return EMPTY_ARRAY;
<%- elsif node.fields.one?(YARP::NodeListField) and node.fields.none?(YARP::NodeKindField) -%>
return this.<%= node.fields.grep(YARP::NodeListField).first.name %>;
<%- elsif node.fields.none?(YARP::NodeListField) -%>
return new Node[] { <%= node.fields.grep(YARP::NodeKindField).map { |field| "this.#{field.name}" }.join(', ') %> };
<%- elsif node.semantic_fields.one?(YARP::NodeListField) and node.semantic_fields.none?(YARP::NodeKindField) -%>
return this.<%= node.semantic_fields.grep(YARP::NodeListField).first.name %>;
<%- elsif node.semantic_fields.none?(YARP::NodeListField) -%>
return new Node[] { <%= node.semantic_fields.grep(YARP::NodeKindField).map { |field| "this.#{field.name}" }.join(', ') %> };
<%- else -%>
ArrayList<Node> childNodes = new ArrayList<>();
<%- node.fields.each do |field| -%>
<%- node.semantic_fields.each do |field| -%>
<%- case field -%>
<%- when YARP::NodeField, YARP::OptionalNodeField -%>
childNodes.add(this.<%= field.name %>);
Expand Down
4 changes: 4 additions & 0 deletions templates/lib/yarp/serialize.rb.erb
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ module YARP
def load_nodes
raise "Invalid serialization" if io.read(4) != "YARP"
raise "Invalid serialization" if io.read(3).unpack("C3") != [MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION]
only_semantic_fields = io.read(1).unpack1("C")
unless only_semantic_fields == 0
raise "Invalid serialization (location fields must be included but are not)"
end

@encoding = load_encoding
@input = input.force_encoding(@encoding).freeze
Expand Down
4 changes: 4 additions & 0 deletions templates/src/serialize.c.erb
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,18 @@ yp_serialize_node(yp_parser_t *parser, yp_node_t *node, yp_buffer_t *buffer) {
yp_buffer_append_u32(buffer, yp_sizet_to_u32(((yp_<%= node.human %>_t *)node)-><%= field.name %>.ids[index]));
}
<%- when YARP::LocationField -%>
<%- if field.should_be_serialized? -%>
yp_serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
<%- end -%>
<%- when YARP::OptionalLocationField -%>
<%- if field.should_be_serialized? -%>
if (((yp_<%= node.human %>_t *)node)-><%= field.name %>.start == NULL) {
yp_buffer_append_u8(buffer, 0);
} else {
yp_buffer_append_u8(buffer, 1);
yp_serialize_location(parser, &((yp_<%= node.human %>_t *)node)-><%= field.name %>, buffer);
}
<%- end -%>
<%- when YARP::UInt32Field -%>
yp_buffer_append_u32(buffer, ((yp_<%= node.human %>_t *)node)-><%= field.name %>);
<%- when YARP::FlagsField -%>
Expand Down
22 changes: 22 additions & 0 deletions templates/template.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
module YARP
COMMON_FLAGS = 2

SERIALIZE_ONLY_SEMANTICS_FIELDS = ENV.fetch("YARP_SERIALIZE_ONLY_SEMANTICS_FIELDS", false)

# This represents a field on a node. It contains all of the necessary
# information to template out the code for that field.
class Field
Expand All @@ -15,6 +17,14 @@ class Field
def initialize(name:, type:, **options)
@name, @type, @options = name, type, options
end

def semantic_field?
true
end

def should_be_serialized?
SERIALIZE_ONLY_SEMANTICS_FIELDS ? semantic_field? : true
end
end

# Some node fields can be specialized if they point to a specific kind of
Expand Down Expand Up @@ -122,6 +132,10 @@ def java_type

# This represents a field on a node that is a location.
class LocationField < Field
def semantic_field?
options[:semantic_field] || false
end

def rbs_class
"Location"
end
Expand All @@ -133,6 +147,10 @@ def java_type

# This represents a field on a node that is a location that is optional.
class OptionalLocationField < Field
def semantic_field?
options[:semantic_field] || false
end

def rbs_class
"Location?"
end
Expand Down Expand Up @@ -192,6 +210,10 @@ def initialize(config)
@comment = config.fetch("comment")
end

def semantic_fields
@semantic_fields ||= @fields.select(&:semantic_field?)
end

# Should emit serialized length of node so implementations can skip
# the node to enable lazy parsing.
def needs_serialized_length?
Expand Down

0 comments on commit fc5cf2d

Please sign in to comment.