Lokasi ngalangkungan proxy:   [ UP ]  
[Ngawartoskeun bug]   [Panyetelan cookie]                
Skip to content
This repository was archived by the owner on Feb 24, 2026. It is now read-only.

Commit 694abbb

Browse files
authored
feat: Add support for flexible column name in JsonStreamWriter (#1786)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [X] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/java-bigquerystorage/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [X] Ensure the tests and linter pass - [X] Code coverage does not decrease (if any source code was changed) - [X] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> ☕️ If you write sample code, please follow the [samples format]( https://github.com/GoogleCloudPlatform/java-docs-samples/blob/main/SAMPLE_FORMAT.md).
1 parent c24c14f commit 694abbb

8 files changed

Lines changed: 265 additions & 18 deletions

File tree

google-cloud-bigquerystorage/src/main/java/com/google/cloud/bigquery/storage/v1/BQTableSchemaToProtoDescriptor.java

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import com.google.common.collect.ImmutableMap;
2121
import com.google.protobuf.DescriptorProtos.DescriptorProto;
2222
import com.google.protobuf.DescriptorProtos.FieldDescriptorProto;
23+
import com.google.protobuf.DescriptorProtos.FieldOptions;
2324
import com.google.protobuf.DescriptorProtos.FileDescriptorProto;
2425
import com.google.protobuf.Descriptors;
2526
import com.google.protobuf.Descriptors.Descriptor;
@@ -92,7 +93,11 @@ private static Descriptor convertBQTableSchemaToProtoDescriptorImpl(
9293
List<FieldDescriptorProto> fields = new ArrayList<FieldDescriptorProto>();
9394
int index = 1;
9495
for (TableFieldSchema BQTableField : BQTableSchema.getFieldsList()) {
95-
String currentScope = scope + "__" + BQTableField.getName();
96+
String scopeName =
97+
BigQuerySchemaUtil.isProtoCompatible(BQTableField.getName())
98+
? BQTableField.getName()
99+
: BigQuerySchemaUtil.generatePlaceholderFieldName(BQTableField.getName());
100+
String currentScope = scope + "__" + scopeName;
96101
if (BQTableField.getType() == TableFieldSchema.Type.STRUCT) {
97102
ImmutableList<TableFieldSchema> fieldList =
98103
ImmutableList.copyOf(BQTableField.getFieldsList());
@@ -137,19 +142,26 @@ private static FieldDescriptorProto convertBQTableFieldToProtoField(
137142
TableFieldSchema BQTableField, int index, String scope) {
138143
TableFieldSchema.Mode mode = BQTableField.getMode();
139144
String fieldName = BQTableField.getName().toLowerCase();
145+
146+
FieldDescriptorProto.Builder fieldDescriptor =
147+
FieldDescriptorProto.newBuilder()
148+
.setName(fieldName)
149+
.setNumber(index)
150+
.setLabel((FieldDescriptorProto.Label) BQTableSchemaModeMap.get(mode));
151+
140152
if (BQTableField.getType() == TableFieldSchema.Type.STRUCT) {
141-
return FieldDescriptorProto.newBuilder()
142-
.setName(fieldName)
143-
.setTypeName(scope)
144-
.setLabel((FieldDescriptorProto.Label) BQTableSchemaModeMap.get(mode))
145-
.setNumber(index)
146-
.build();
153+
fieldDescriptor.setTypeName(scope);
154+
} else {
155+
fieldDescriptor.setType(
156+
(FieldDescriptorProto.Type) BQTableSchemaTypeMap.get(BQTableField.getType()));
157+
}
158+
159+
// Sets columnName annotation when field name is not proto comptaible.
160+
if (!BigQuerySchemaUtil.isProtoCompatible(fieldName)) {
161+
fieldDescriptor.setName(BigQuerySchemaUtil.generatePlaceholderFieldName(fieldName));
162+
fieldDescriptor.setOptions(
163+
FieldOptions.newBuilder().setExtension(AnnotationsProto.columnName, fieldName).build());
147164
}
148-
return FieldDescriptorProto.newBuilder()
149-
.setName(fieldName)
150-
.setType((FieldDescriptorProto.Type) BQTableSchemaTypeMap.get(BQTableField.getType()))
151-
.setLabel((FieldDescriptorProto.Label) BQTableSchemaModeMap.get(mode))
152-
.setNumber(index)
153-
.build();
165+
return fieldDescriptor.build();
154166
}
155167
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.google.cloud.bigquery.storage.v1;
17+
18+
import com.google.protobuf.Descriptors.FieldDescriptor;
19+
import java.nio.charset.StandardCharsets;
20+
import java.util.Base64;
21+
import java.util.regex.Pattern;
22+
23+
public class BigQuerySchemaUtil {
24+
25+
private static final String PROTO_COMPATIBLE_NAME_REGEXP = "[A-Za-z_][A-Za-z0-9_]*";
26+
private static final String PLACEHOLDER_FILED_NAME_PREFIX = "col_";
27+
private static final Pattern PROTO_COMPATIBLE_NAME_PATTERN =
28+
Pattern.compile(PROTO_COMPATIBLE_NAME_REGEXP);
29+
30+
/**
31+
* * Checks if the field name is compatible with proto field naming convention.
32+
*
33+
* @param fieldName name for the field
34+
* @return true if the field name is comptaible with proto naming convention, otherwise, returns
35+
* false.
36+
*/
37+
public static boolean isProtoCompatible(String fieldName) {
38+
return PROTO_COMPATIBLE_NAME_PATTERN.matcher(fieldName).matches();
39+
}
40+
41+
/**
42+
* * Generates a placeholder name that consists of a prefix + base64 encoded field name. We
43+
* replace all dashes with underscores as they are not allowed for proto field names.
44+
*
45+
* @param fieldName name for the field
46+
* @return the generated placeholder field name
47+
*/
48+
public static String generatePlaceholderFieldName(String fieldName) {
49+
return PLACEHOLDER_FILED_NAME_PREFIX
50+
+ Base64.getUrlEncoder()
51+
.withoutPadding()
52+
.encodeToString(fieldName.getBytes(StandardCharsets.UTF_8))
53+
.replace('-', '_');
54+
}
55+
56+
/**
57+
* * Gets the user-facing field name from the descriptor
58+
*
59+
* @param fieldDescriptor
60+
* @return columnName annotation if present, otherwise return the field name.
61+
*/
62+
public static String getFieldName(FieldDescriptor fieldDescriptor) {
63+
return fieldDescriptor.getOptions().hasExtension(AnnotationsProto.columnName)
64+
? fieldDescriptor.getOptions().getExtension(AnnotationsProto.columnName)
65+
: fieldDescriptor.getName();
66+
}
67+
}

google-cloud-bigquerystorage/src/main/java/com/google/cloud/bigquery/storage/v1/JsonToProtoMessage.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,9 +197,15 @@ private static DynamicMessage convertJsonToProtoMessageImpl(
197197
String jsonName = jsonNames[i];
198198
// We want lowercase here to support case-insensitive data writes.
199199
// The protobuf descriptor that is used is assumed to have all lowercased fields
200-
String jsonLowercaseName = jsonName.toLowerCase();
200+
String jsonFieldLocator = jsonName.toLowerCase();
201+
202+
// If jsonName is not compatible with proto naming convention, we should look by its
203+
// placeholder name.
204+
if (!BigQuerySchemaUtil.isProtoCompatible(jsonFieldLocator)) {
205+
jsonFieldLocator = BigQuerySchemaUtil.generatePlaceholderFieldName(jsonFieldLocator);
206+
}
201207
String currentScope = jsonScope + "." + jsonName;
202-
FieldDescriptor field = protoSchema.findFieldByName(jsonLowercaseName);
208+
FieldDescriptor field = protoSchema.findFieldByName(jsonFieldLocator);
203209
if (field == null && !ignoreUnknownFields) {
204210
throw new Exceptions.JsonDataHasUnknownFieldException(currentScope);
205211
} else if (field == null) {
@@ -209,7 +215,7 @@ private static DynamicMessage convertJsonToProtoMessageImpl(
209215
if (tableSchema != null) {
210216
// protoSchema is generated from tableSchema so their field ordering should match.
211217
fieldSchema = tableSchema.get(field.getIndex());
212-
if (!fieldSchema.getName().toLowerCase().equals(field.getName())) {
218+
if (!fieldSchema.getName().toLowerCase().equals(BigQuerySchemaUtil.getFieldName(field))) {
213219
throw new ValidationException(
214220
"Field at index "
215221
+ field.getIndex()

google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1/BQTableSchemaToProtoDescriptorTest.java

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ public void testSimpleTypes() throws Exception {
107107

108108
@Test
109109
public void testStructSimple() throws Exception {
110-
final TableFieldSchema StringType =
110+
final TableFieldSchema stringType =
111111
TableFieldSchema.newBuilder()
112112
.setType(TableFieldSchema.Type.STRING)
113113
.setMode(TableFieldSchema.Mode.NULLABLE)
@@ -118,7 +118,7 @@ public void testStructSimple() throws Exception {
118118
.setType(TableFieldSchema.Type.STRUCT)
119119
.setMode(TableFieldSchema.Mode.NULLABLE)
120120
.setName("test_field_type")
121-
.addFields(0, StringType)
121+
.addFields(0, stringType)
122122
.build();
123123
final TableSchema tableSchema = TableSchema.newBuilder().addFields(0, tableFieldSchema).build();
124124
final Descriptor descriptor =
@@ -509,4 +509,32 @@ public void testDescriptorReuseDuringCreation() throws Exception {
509509
assertEquals(descriptorToCount.get("root__reuse_lvl1__reuse_lvl2").intValue(), 3);
510510
isDescriptorEqual(descriptor, ReuseRoot.getDescriptor());
511511
}
512+
513+
@Test
514+
public void testNestedFlexibleFieldName() throws Exception {
515+
final TableFieldSchema stringField =
516+
TableFieldSchema.newBuilder()
517+
.setType(TableFieldSchema.Type.STRING)
518+
.setMode(TableFieldSchema.Mode.NULLABLE)
519+
.setName("str-列")
520+
.build();
521+
final TableFieldSchema intField =
522+
TableFieldSchema.newBuilder()
523+
.setType(TableFieldSchema.Type.INT64)
524+
.setMode(TableFieldSchema.Mode.NULLABLE)
525+
.setName("int-列")
526+
.build();
527+
final TableFieldSchema nestedField =
528+
TableFieldSchema.newBuilder()
529+
.setType(TableFieldSchema.Type.STRUCT)
530+
.setMode(TableFieldSchema.Mode.NULLABLE)
531+
.setName("nested-列")
532+
.addFields(0, intField)
533+
.build();
534+
final TableSchema tableSchema =
535+
TableSchema.newBuilder().addFields(0, stringField).addFields(1, nestedField).build();
536+
final Descriptor descriptor =
537+
BQTableSchemaToProtoDescriptor.convertBQTableSchemaToProtoDescriptor(tableSchema);
538+
isDescriptorEqual(descriptor, TestNestedFlexibleFieldName.getDescriptor());
539+
}
512540
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.google.cloud.bigquery.storage.v1;
17+
18+
import com.google.cloud.bigquery.storage.test.SchemaTest.TestNestedFlexibleFieldName;
19+
import com.google.protobuf.Descriptors.Descriptor;
20+
import java.util.Arrays;
21+
import java.util.List;
22+
import junit.framework.TestCase;
23+
import org.junit.Test;
24+
import org.junit.runner.RunWith;
25+
import org.junit.runners.JUnit4;
26+
27+
@RunWith(JUnit4.class)
28+
public class BigQuerySchemaUtilTest extends TestCase {
29+
30+
@Test
31+
public void testIsProtoCompatible() {
32+
List<String> protoCompatibleNames = Arrays.asList("col_1", "name", "_0_");
33+
List<String> protoIncompatibleNames = Arrays.asList("0_col", "()", "列", "a-1");
34+
protoCompatibleNames.stream()
35+
.forEach(
36+
name -> {
37+
assertTrue(BigQuerySchemaUtil.isProtoCompatible(name));
38+
});
39+
protoIncompatibleNames.stream()
40+
.forEach(
41+
name -> {
42+
assertFalse(BigQuerySchemaUtil.isProtoCompatible(name));
43+
});
44+
}
45+
46+
public void testGeneratePlaceholderFieldName() {
47+
assertEquals("col_c3RyLeWIlw", BigQuerySchemaUtil.generatePlaceholderFieldName("str-列"));
48+
// Base64 url encodes "~/~/" to "fi9-Lw", we replaced - with _ to be proto compatible.
49+
assertEquals("col_fi9_Lw", BigQuerySchemaUtil.generatePlaceholderFieldName("~/~/"));
50+
}
51+
52+
public void testGetFieldName() {
53+
// Test get name from annotations.
54+
Descriptor flexibleDescriptor = TestNestedFlexibleFieldName.getDescriptor();
55+
assertEquals("str-列", BigQuerySchemaUtil.getFieldName(flexibleDescriptor.getFields().get(0)));
56+
assertEquals(
57+
"nested-列", BigQuerySchemaUtil.getFieldName(flexibleDescriptor.getFields().get(1)));
58+
59+
// Test get name without annotations.
60+
Descriptor descriptor = TestNestedFlexibleFieldName.getDescriptor();
61+
assertEquals("int32_value", BigQuerySchemaUtil.getFieldName(descriptor.getFields().get(0)));
62+
assertEquals("int64_value", BigQuerySchemaUtil.getFieldName(descriptor.getFields().get(1)));
63+
}
64+
}

google-cloud-bigquerystorage/src/test/java/com/google/cloud/bigquery/storage/v1/JsonStreamWriterTest.java

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import com.google.api.gax.grpc.testing.MockServiceHelper;
3131
import com.google.cloud.bigquery.storage.test.JsonTest;
3232
import com.google.cloud.bigquery.storage.test.SchemaTest;
33+
import com.google.cloud.bigquery.storage.test.Test.FlexibleType;
3334
import com.google.cloud.bigquery.storage.test.Test.FooType;
3435
import com.google.cloud.bigquery.storage.test.Test.UpdatedFooType;
3536
import com.google.cloud.bigquery.storage.v1.Exceptions.AppendSerializtionError;
@@ -212,6 +213,54 @@ public void testSingleAppendSimpleJson() throws Exception {
212213
}
213214
}
214215

216+
@Test
217+
public void testFlexibleColumnAppend() throws Exception {
218+
TableFieldSchema field =
219+
TableFieldSchema.newBuilder()
220+
.setType(TableFieldSchema.Type.STRING)
221+
.setMode(TableFieldSchema.Mode.NULLABLE)
222+
.setName("test-列")
223+
.build();
224+
TableSchema tableSchema = TableSchema.newBuilder().addFields(0, field).build();
225+
FlexibleType expectedProto = FlexibleType.newBuilder().setColDGVzdC3LiJc("allen").build();
226+
JSONObject flexible = new JSONObject();
227+
flexible.put("test-列", "allen");
228+
JSONArray jsonArr = new JSONArray();
229+
jsonArr.put(flexible);
230+
231+
try (JsonStreamWriter writer =
232+
getTestJsonStreamWriterBuilder(TEST_STREAM, tableSchema).setTraceId("test:empty").build()) {
233+
234+
testBigQueryWrite.addResponse(
235+
AppendRowsResponse.newBuilder()
236+
.setAppendResult(
237+
AppendRowsResponse.AppendResult.newBuilder().setOffset(Int64Value.of(0)).build())
238+
.build());
239+
240+
ApiFuture<AppendRowsResponse> appendFuture = writer.append(jsonArr);
241+
assertEquals(0L, appendFuture.get().getAppendResult().getOffset().getValue());
242+
appendFuture.get();
243+
assertEquals(
244+
1,
245+
testBigQueryWrite
246+
.getAppendRequests()
247+
.get(0)
248+
.getProtoRows()
249+
.getRows()
250+
.getSerializedRowsCount());
251+
assertEquals(
252+
testBigQueryWrite
253+
.getAppendRequests()
254+
.get(0)
255+
.getProtoRows()
256+
.getRows()
257+
.getSerializedRows(0),
258+
expectedProto.toByteString());
259+
assertEquals(
260+
testBigQueryWrite.getAppendRequests().get(0).getTraceId(), "JsonWriter_test:empty");
261+
}
262+
}
263+
215264
@Test
216265
public void testSpecialTypeAppend() throws Exception {
217266
TableFieldSchema field =

google-cloud-bigquerystorage/src/test/proto/schemaTest.proto

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ syntax = "proto2";
22

33
package com.google.cloud.bigquery.storage.test;
44

5+
import "google/cloud/bigquery/storage/v1/annotations.proto";
6+
57
message SupportedTypes {
68
optional int32 int32_value = 1;
79
optional int64 int64_value = 2;
@@ -257,3 +259,15 @@ message AllowUnknownUnsupportedFields {
257259
message FakeFooType {
258260
optional int32 foo = 1;
259261
}
262+
263+
message TestNestedFlexibleFieldName {
264+
optional string col_c3RyLeWIlw = 1
265+
[(.google.cloud.bigquery.storage.v1.column_name) = "str-列"];
266+
optional FlexibleNameField col_bmVzdGVkLeWIlw = 2
267+
[(.google.cloud.bigquery.storage.v1.column_name) = "nested-列"];
268+
}
269+
270+
message FlexibleNameField {
271+
optional int64 col_aW50LeWIlw = 1
272+
[(.google.cloud.bigquery.storage.v1.column_name) = "int-列"];
273+
}

google-cloud-bigquerystorage/src/test/proto/test.proto

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ syntax = "proto2";
1717

1818
package com.google.cloud.bigquery.storage.test;
1919

20+
import "google/cloud/bigquery/storage/v1/annotations.proto";
21+
2022
enum TestEnum {
2123
TestEnum0 = 0;
2224
TestEnum1 = 1;
@@ -80,3 +82,8 @@ message DuplicateType {
8082
optional ComplicateType f3 = 3;
8183
optional ComplicateType f4 = 4;
8284
}
85+
86+
message FlexibleType {
87+
optional string col_dGVzdC3liJc = 1
88+
[(.google.cloud.bigquery.storage.v1.column_name) = "test-列"];
89+
}

0 commit comments

Comments
 (0)