--- /dev/null
+/*******************************************************************************
+ * Copyright (c) 2007, 2010 Association for Decentralized Information Management
+ * in Industry THTH ry.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * VTT Technical Research Centre of Finland - initial API and implementation
+ *******************************************************************************/
+package org.simantics.db.tests.api.story.misc;
+
+import java.io.BufferedInputStream;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+import org.simantics.db.Resource;
+import org.simantics.db.Session;
+import org.simantics.db.WriteGraph;
+import org.simantics.db.common.request.WriteRequest;
+import org.simantics.db.exception.DatabaseException;
+import org.simantics.db.service.LifecycleSupport;
+import org.simantics.db.testing.annotation.Fails;
+import org.simantics.db.testing.common.Tests;
+import org.simantics.db.tests.common.Configuration;
+import org.simantics.layer0.Layer0;
+
+public class TextIndexingTest {
+
+ static String host = Configuration.get().host;
+ static int port = Configuration.get().port;
+ static String dataFileName = Configuration.get().textIndexingFile;
+ Session session;
+ Text text;
+ Resource root;
+ Resource[] relations = new Resource[128];
+
+ @Before
+ public void setUp() throws Exception {
+// SessionFactory factory = new SessionFactory(host, port);
+// session = factory.create();
+ session = Tests.getTestHandler().getSession();
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ LifecycleSupport support = session.getService(LifecycleSupport.class);
+ support.close();
+ }
+
+ class SetUp extends WriteRequest {
+
+ @Override
+ public void perform(WriteGraph g) throws DatabaseException {
+ root = g.newResource();
+ Layer0 l0 = Layer0.getInstance(g);
+ for(int i=0;i<128;++i) {
+ relations[i] = g.newResource();
+ Resource inv = g.newResource();
+ g.claim(relations[i], l0.InverseOf, inv);
+ g.claim(relations[i], l0.SubrelationOf, l0.IsRelatedTo);
+ g.claim(inv, l0.SubrelationOf, l0.IsWeaklyRelatedTo);
+ }
+ }
+
+ }
+
+ boolean finished = false;
+
+ class WriteTrie extends WriteRequest {
+
+ int count = 0;
+ int newWords = 0;
+
+ @Override
+ public void perform(WriteGraph g) throws DatabaseException {
+
+ try {
+
+ //System.out.println("T");
+ byte[] word;
+ Layer0 b = Layer0.getInstance(g);
+ long startTime = System.nanoTime();
+ while( (word = text.getWord()) != null ) {
+
+ if(word == PEND)
+ continue;
+
+ Resource cur = root;
+ for(int i=0;i<word.length;++i) {
+ Resource relation = relations[word[i]];
+ Resource temp = g.getPossibleObject(cur, relation);
+ if(temp == null) {
+ temp = g.newResource();
+ g.claim(cur, relation, temp);
+ }
+ cur = temp;
+ }
+
+ Resource name = g.getPossibleObject(cur, b.HasName);
+ if(name == null) {
+ name = g.newResource();
+ g.claimValue(name, new String(word));
+ g.claim(cur, b.HasName, name);
+ ++newWords;
+ }
+
+ ++count;
+ if((count % 2000) == 0)
+ System.out.println(count + ", " + newWords);
+ }
+ long endTime = System.nanoTime();
+ System.out.println("End");
+ System.out.println(count + " words, " + newWords + " unique words");
+ System.out.println((endTime-startTime)*1e-6/count + " ms / word");
+
+ finished = true;
+
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ }
+
+ }
+
+ @Fails
+ @Test
+ public void test() {
+ try {
+ text = new Text(new BufferedInputStream(new FileInputStream(dataFileName)));
+ session.syncRequest(new SetUp());
+ System.out.println("Start");
+ WriteTrie wt = new WriteTrie();
+ while(!finished)
+ session.syncRequest(wt);
+ System.out.println("Ok");
+ } catch (FileNotFoundException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ } catch (DatabaseException e) {
+ e.printStackTrace();
+ }
+ }
+
+ final static byte[] PEND = new byte[0];
+
+ static class Text {
+ InputStream s;
+ byte[] temp = new byte[1024];
+
+ public Text(InputStream s) {
+ this.s = s;
+ }
+
+ byte[] getWord() throws IOException {
+ int b;
+ while( !Character.isLetter(b = s.read()) && b != -1 && b != '\n');
+ if(b == -1)
+ return null;
+ if(b == '\n')
+ return PEND;
+ int i=0;
+ while( Character.isLetter(b) ) {
+ temp[i] = (byte)Character.toLowerCase(b);
+ ++i;
+ b = s.read();
+ }
+ return Arrays.copyOf(temp, i);
+ }
+ }
+
+}