2015-10-26 3 views

ответ

0

Этот код Java работал для меня, надеюсь, полезно для других. импорт java.io. ; импорт java.util.;

import org.apache.avro.*; 
import org.apache.avro.generic.*; 
import org.apache.avro.file.*; 
import org.apache.avro.io.*; 

public class AvrotoTextFormatter 

{ 

public static void main (String args[]) throws Exception 

{ 

    InputStream in = null; 
    in = new FileInputStream(args[0]); 
    BufferedReader br; 
    BufferedInputStream inStream = new BufferedInputStream(in); 
    PrintWriter pr1 = new PrintWriter(args[1], "UTF-8"); 
    PrintWriter pr = new PrintWriter(args[2], "UTF-8"); 
    StringTokenizer st; 
    StringTokenizer st1; 
    int row_counter = 0; 
    String header_fields = ""; 
    String content_records = ""; 
    String sCurrentLine = ""; 


    GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); 
    DataFileStream<Object> fileReader = new DataFileStream<Object>(inStream, reader); 



    pr1.println(fileReader.getSchema().getFields()); 

    pr1.close(); 

    br = new BufferedReader(new java.io.FileReader(args[1])); 


    while ((sCurrentLine = br.readLine()) != null) 
    { 
    st = new StringTokenizer(sCurrentLine," "); 
    while (st.hasMoreTokens()) 
    { 
     header_fields = header_fields + st.nextToken() + "|"; 
     st.nextToken(); 
     st.nextToken();    
    }   
    } 
    header_fields = header_fields.substring(1,header_fields.length()-1); 
    pr.println(header_fields); 


    File file = new File(args[0]); 
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(fileReader.getSchema()); 
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, datumReader); 
    GenericRecord user = null; 
     while (dataFileReader.hasNext()) 
      { 
       content_records = ""; 
       user = dataFileReader.next(user); 
       st1 = new StringTokenizer(header_fields,"|"); 
       while (st1.hasMoreTokens()) 
      { 
       content_records = content_records + user.get(st1.nextToken()) + "|";    
       } 
    content_records = content_records.substring(0,content_records.length()-1); 
    pr.println(content_records); 
      } 
    fileReader.close(); 
    br.close(); 
    pr.close(); 

} 

} 
0

Обнаружен снипп с искрами, чтобы сделать то же самое быстрее и проще. Sharing так, что будет полезно для других

import org.apache.avro.mapreduce.AvroKeyInputFormat 
import org.apache.avro.mapred.AvroKey 
import org.apache.hadoop.io.NullWritable 
val avroRdd = sc.newAPIHadoopFile("/sit/data/presentation/bbsbi/alayer/test/000000_0", classOf[AvroKeyInputFormat[String]], classOf[AvroKey[String]], classOf[NullWritable]).keys.map(_.toString) 

val n=avroRdd.map(_.split(",").map(_.split(":")(1).trim).map(l=>l.substring(l.indexOf("\"")+1,l.lastIndexOf("\""))).mkString("|")) 
n.collect.foreach(println) 
0

Ниже код работает для меня

private static JSONArray readJsonFromAvro(String absFilePath)throws IOException, 
InterruptedException 
{ 
    JSONArray jsonarray = new JSONArray(); 
    InputStream in = new FileInputStream(absFilePath); 
    ByteArrayOutputStream baos = new ByteArrayOutputStream(); 
    BufferedInputStream inStream = new BufferedInputStream(in); 
    GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); 
    DataFileStream<Object> fileReader = new DataFileStream<Object>(
      inStream, reader); 
    try { 
     final Schema schema = fileReader.getSchema(); 
     final JsonEncoder encoder = EncoderFactory.get().jsonEncoder(
       schema, baos); 
     for (final Object datum : fileReader) { 
      //writer.write(datum, encoder); 
      JSONObject jsonObj = new JSONObject(datum.toString()); 
      jsonarray.put(jsonObj); 
     } 
     encoder.flush(); 
     System.out.println(); 
    } finally { 
     fileReader.close(); 
    } 
    return jsonarray; 
} 
-1

Как я могу работать ниже программы? Я имею в виду, какие 3 ввода командной строки я могу предоставить?

import org.apache.avro.*; 
import org.apache.avro.generic.*; 
import org.apache.avro.file.*; 
import org.apache.avro.io.*; 

public class AvrotoTextFormatter 

{ 

public static void main (String args[]) throws Exception 

{ 

    InputStream in = null; 
    in = new FileInputStream(args[0]); 
    BufferedReader br; 
    BufferedInputStream inStream = new BufferedInputStream(in); 
    PrintWriter pr1 = new PrintWriter(args[1], "UTF-8"); 
    PrintWriter pr = new PrintWriter(args[2], "UTF-8"); 
    StringTokenizer st; 
    StringTokenizer st1; 
    int row_counter = 0; 
    String header_fields = ""; 
    String content_records = ""; 
    String sCurrentLine = ""; 


    GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); 
    DataFileStream<Object> fileReader = new DataFileStream<Object>(inStream, reader); 



    pr1.println(fileReader.getSchema().getFields()); 

    pr1.close(); 

    br = new BufferedReader(new java.io.FileReader(args[1])); 


    while ((sCurrentLine = br.readLine()) != null) 
    { 
    st = new StringTokenizer(sCurrentLine," "); 
    while (st.hasMoreTokens()) 
    { 
     header_fields = header_fields + st.nextToken() + "|"; 
     st.nextToken(); 
     st.nextToken();    
    }   
    } 
    header_fields = header_fields.substring(1,header_fields.length()-1); 
    pr.println(header_fields); 


    File file = new File(args[0]); 
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(fileReader.getSchema()); 
    DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, datumReader); 
    GenericRecord user = null; 
     while (dataFileReader.hasNext()) 
      { 
       content_records = ""; 
       user = dataFileReader.next(user); 
       st1 = new StringTokenizer(header_fields,"|"); 
       while (st1.hasMoreTokens()) 
      { 
       content_records = content_records + user.get(st1.nextToken()) + "|";    
       } 
    content_records = content_records.substring(0,content_records.length()-1); 
    pr.println(content_records); 
      } 
    fileReader.close(); 
    br.close(); 
    pr.close(); 

} 

} 
Смежные вопросы