001    /*
002     * JBoss, Home of Professional Open Source.
003     * Copyright 2008, Red Hat Middleware LLC, and individual contributors
004     * as indicated by the @author tags. See the copyright.txt file in the
005     * distribution for a full listing of individual contributors.
006     *
007     * This is free software; you can redistribute it and/or modify it
008     * under the terms of the GNU Lesser General Public License as
009     * published by the Free Software Foundation; either version 2.1 of
010     * the License, or (at your option) any later version.
011     *
012     * This software is distributed in the hope that it will be useful,
013     * but WITHOUT ANY WARRANTY; without even the implied warranty of
014     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015     * Lesser General Public License for more details.
016     *
017     * You should have received a copy of the GNU Lesser General Public
018     * License along with this software; if not, write to the Free
019     * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
020     * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
021     */
022    package org.jboss.dna.sequencer.msoffice.excel;
023    
024    import java.io.IOException;
025    import java.io.InputStream;
026    import java.util.ArrayList;
027    import java.util.List;
028    import org.apache.poi.hssf.extractor.ExcelExtractor;
029    import org.apache.poi.hssf.usermodel.HSSFWorkbook;
030    import org.apache.poi.poifs.filesystem.POIFSFileSystem;
031    
032    /**
033     * @author Michael Trezzi
034     * @description Extracts data and some metadata from excel files
035     */
036    public class ExcelMetadataReader {
037    
038        public static ExcelMetadata instance( InputStream stream ) throws IOException {
039            ExcelMetadata metadata = new ExcelMetadata();
040            HSSFWorkbook wb = new HSSFWorkbook(new POIFSFileSystem(stream));
041            ExcelExtractor extractor = new ExcelExtractor(wb);
042    
043            extractor.setFormulasNotResults(true);
044            extractor.setIncludeSheetNames(false);
045            metadata.setText(extractor.getText());
046            List<String> sheets = new ArrayList<String>();
047            for (int i = 0; i < wb.getNumberOfSheets(); i++) {
048                sheets.add(wb.getSheetName(i));
049            }
050            metadata.setSheets(sheets);
051            return metadata;
052        }
053    }