-
Notifications
You must be signed in to change notification settings - Fork 262
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Set avro schema configuration in format bundle
Rather than managing the avro reader schema configuration in the input format getSplits method, it needs to be managed when creating the format bundle. Otherwise a crunch pipeline that has multiple inputs (kite views) with different schemas will not see the correct reader schemas. Note that the test only demonstrates the problem when also upgrading to crunch 0.13.0 (which is not part of this commit). This is due to CRUNCH-551 which is a fix for a problem in crunch that hides the current issue (at least in the scenario of the test) in versions before crunch-0.13.0. A test was also added to verify the behaviour with plain map/reduce to ensure that this continues to work as expected.
- Loading branch information
1 parent
1070ff9
commit 46068dc
Showing
5 changed files
with
222 additions
and
86 deletions.
There are no files selected for viewing
55 changes: 55 additions & 0 deletions
55
...a/kite-data-core/src/main/java/org/kitesdk/data/spi/filesystem/AvroConfigurationUtil.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
/** | ||
* Copyright 2014 Cloudera Inc. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.kitesdk.data.spi.filesystem; | ||
|
||
import org.apache.avro.Schema; | ||
import org.apache.avro.generic.GenericData; | ||
import org.apache.avro.hadoop.io.AvroSerialization; | ||
import org.apache.hadoop.conf.Configuration; | ||
import org.apache.parquet.avro.AvroReadSupport; | ||
import org.kitesdk.compat.DynMethods; | ||
import org.kitesdk.data.Format; | ||
import org.kitesdk.data.Formats; | ||
import org.kitesdk.data.spi.DataModelUtil; | ||
|
||
public class AvroConfigurationUtil { | ||
|
||
// Constant from AvroJob copied here so we can set it on the Configuration | ||
// given to this class. | ||
private static final String AVRO_SCHEMA_INPUT_KEY = "avro.schema.input.key"; | ||
|
||
// this is required for 1.7.4 because setDataModelClass is not available | ||
private static final DynMethods.StaticMethod setModel = | ||
new DynMethods.Builder("setDataModelClass") | ||
.impl(AvroSerialization.class, Configuration.class, Class.class) | ||
.defaultNoop() | ||
.buildStatic(); | ||
|
||
public static void configure(Configuration conf, Format format, Schema schema, Class<?> type) { | ||
GenericData model = DataModelUtil.getDataModelForType(type); | ||
if (Formats.AVRO.equals(format)) { | ||
setModel.invoke(conf, model.getClass()); | ||
conf.set(AVRO_SCHEMA_INPUT_KEY, schema.toString()); | ||
|
||
} else if (Formats.PARQUET.equals(format)) { | ||
// TODO: update to a version of Parquet with setAvroDataSupplier | ||
//AvroReadSupport.setAvroDataSupplier(conf, | ||
// DataModelUtil.supplierClassFor(model)); | ||
AvroReadSupport.setAvroReadSchema(conf, schema); | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.