记录 RedisTemplate.executePipelined 使用问题

需求，向redis写入9000万个key

第一个版本(关键代码)

@Slf4j
@Component("job2")
public class ToRedis2 implements IJob {

    private AtomicLong count = new AtomicLong(0);
    private Long oldCount=0L;
    private List<String> userIdList = new LinkedList<>();

    private ExecutorService es = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 4);

    @Autowired
    private RedisTemplate<String, String> redisTemplate;

    @Setter
    @Getter
    @Value("${user.limit:100000}")
    private volatile int userLimit;

    private Thread td;
    private Thread c;

    private AtomicBoolean stop=new AtomicBoolean(false);

    private void toRedis() throws IOException {
        String root = System.getProperty("user.dir") + "/2021";
        if (userIdList.isEmpty()) {
            // userId
            String filePath = root + "/user.csv";
            readFile(filePath, 0, userIdList);
            save();
        }
        String type[] = {"r1", "r2", "r3"};
        for (String t : type) {
            es.execute(()->{
                redisTemplate.executePipelined((RedisCallback<Object>) redisConnection -> {
                    // 超大数据集通过管道写入，容易引发内存溢出，内存溢出点主要是接收redis返回结果
                    // 组装数据并写入Redis
                    for (int i = 0; i < userIdList.size(); i++) {
                        if(stop.get()){
                            log.info("job2 stop");
                            break;
                        }
                        final int inx=i;
                        String charset = "UTF-8";
			// userIdList.get(inx)性能问题点
                        String key = "app:xxx:202105:userid_" + userIdList.get(inx) + ":" + t;
                        try {
                            redisConnection.set(key.getBytes(charset), String.valueOf(Math.abs(new Random().nextInt())).getBytes(charset));
                            count.incrementAndGet();
                        } catch (UnsupportedEncodingException e) {
                            log.error("编码失败", e);
                        }
                    }
                    return null;
                });
            });
        }
    }

}

第二个版本(关键代码)


@Slf4j
@Component("job2")
public class ToRedis2 implements IJob {

    private AtomicLong count = new AtomicLong(0);
    private Long oldCount=0L;
    private List<String> userIdList = new ArrayList<>();

    private ExecutorService es = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 4);

    @Autowired
    private RedisTemplate<String, String> redisTemplate;

    @Setter
    @Getter
    @Value("${user.limit:100000}")
    private volatile int userLimit;
    @Setter
    @Getter
    @Value("${user.skip-count:0}")
    private volatile int skipCount;
    @Setter
    @Getter
    @Value("${user.batch-count:10000}")
    private volatile int batchCount;

    private AtomicBoolean stop=new AtomicBoolean(false);

    private void toRedis() throws IOException {
        String root =ystem.getProperty("user.dir") + "/2021";
        if (userIdList.isEmpty()) {
            // userId
            String filePath = root + "/user.txt";
            readFile(filePath, 0, userIdList);
//            save();
        }

        for (String t : type) {
            // 组装数据并写入Redis
            es.execute(()->{
                List<String> info = new LinkedList<>();
                exit:for (int i = 0; i < userIdList.size(); i++) {
                    if (i < skipCount) {
                        continue;
                    }
                    if (stop.get()) {
                        log.info("job2 stop");
                        break exit;
                    }
                    String key = "app:xxx:202105:userid_" + userIdList.get(i) + ":" + t;
                    info.add(key);
                    if (info.size() == getBatchCount() || i == userIdList.size() - 1) {
                        if (!stop.get()) {
                            executePipelined(info);
                            info.clear();
                        }
                    }
                }
            });
        }
    }

    private void executePipelined(List<String> info) {
        RedisSerializer<String> serializer = redisTemplate.getStringSerializer();
        redisTemplate.executePipelined((RedisCallback<String>) connection -> {
            info.forEach((key) -> {
                if(!stop.get()){
                    long c=count.incrementAndGet();
                    connection.set(serializer.serialize(key), serializer.serialize(String.valueOf(c)));
                }
            });
            return null;
        }, serializer);
    }

}

两版对比

第一个版本出现了内存溢出问题以及越运行处理越慢问题,9000万数据预计需要30多个小时
第二个版本没有出现内存溢出，性能比第一个版本提高上百倍，9000万数据只用了10多分钟

问题分析

第一个版本性能问题主要是采用了LinkedList 这个数据结构，这个数据结构的特性是曾、删快，但是读取是线性的，被get方法迷惑了，看了源码发现，这个get实际上进行的是遍历操作，虽然有优化，但是当数据了非常庞大时就会有性能问题

   // LinkedList get 方法源码
    /**
     * LinkedList 的get方法
     */
    public E get(int index) {
        checkElementIndex(index);
        return node(index).item;
    }

   /**
     * get 方法实际调用
     */
    Node<E> node(int index) {
        // assert isElementIndex(index);

        if (index < (size >> 1)) {
            Node<E> x = first;
            // 循环遍历，超大数据会有性能瓶颈
            for (int i = 0; i < index; i++)
                x = x.next;
            return x;
        } else {
            Node<E> x = last;
            // 循环遍历，超大数据会有性能瓶颈
            for (int i = size - 1; i > index; i--)
                x = x.prev;
            return x;
        }
    }

第一个版本内存溢出问题分析，由于超大数据集循环在管道方法内，redis 在redis 处理完一批数据返回结果时会造成结果存不下而内存溢出
第二个版本性能分析
1、超大数据集改为ArrayList，在执行get的时候就不有循环查找问题，由于ArrayList 底层结构是数组，所以通过索引访问时是O(1)的速度，这个比LinkedList 的平局O(logn)要快很多
2、分批处理数据，此处将数据分为10000条每批，这样不会造成由于接收redis返回结果而造成内存溢出问题

PS:关于内存溢出分析，只是从代码、原理角度分析，可能分析的不准确，仅做参考